Merge branch 'next' into for-linus
[platform/kernel/linux-starfive.git] / kernel / sysctl.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/panic.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/filter.h>
36 #include <linux/fs.h>
37 #include <linux/init.h>
38 #include <linux/kernel.h>
39 #include <linux/kobject.h>
40 #include <linux/net.h>
41 #include <linux/sysrq.h>
42 #include <linux/highuid.h>
43 #include <linux/writeback.h>
44 #include <linux/ratelimit.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/oom.h>
59 #include <linux/kmod.h>
60 #include <linux/capability.h>
61 #include <linux/binfmts.h>
62 #include <linux/sched/sysctl.h>
63 #include <linux/mount.h>
64 #include <linux/userfaultfd_k.h>
65 #include <linux/pid.h>
66
67 #include "../lib/kstrtox.h"
68
69 #include <linux/uaccess.h>
70 #include <asm/processor.h>
71
72 #ifdef CONFIG_X86
73 #include <asm/nmi.h>
74 #include <asm/stacktrace.h>
75 #include <asm/io.h>
76 #endif
77 #ifdef CONFIG_SPARC
78 #include <asm/setup.h>
79 #endif
80 #ifdef CONFIG_RT_MUTEXES
81 #include <linux/rtmutex.h>
82 #endif
83
84 /* shared constants to be used in various sysctls */
85 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
86 EXPORT_SYMBOL(sysctl_vals);
87
88 const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX };
89 EXPORT_SYMBOL_GPL(sysctl_long_vals);
90
91 #if defined(CONFIG_SYSCTL)
92
93 /* Constants used for minimum and maximum */
94
95 #ifdef CONFIG_PERF_EVENTS
96 static const int six_hundred_forty_kb = 640 * 1024;
97 #endif
98
99
100 static const int ngroups_max = NGROUPS_MAX;
101 static const int cap_last_cap = CAP_LAST_CAP;
102
103 #ifdef CONFIG_PROC_SYSCTL
104
105 /**
106  * enum sysctl_writes_mode - supported sysctl write modes
107  *
108  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
109  *      to be written, and multiple writes on the same sysctl file descriptor
110  *      will rewrite the sysctl value, regardless of file position. No warning
111  *      is issued when the initial position is not 0.
112  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
113  *      not 0.
114  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
115  *      file position 0 and the value must be fully contained in the buffer
116  *      sent to the write syscall. If dealing with strings respect the file
117  *      position, but restrict this to the max length of the buffer, anything
118  *      passed the max length will be ignored. Multiple writes will append
119  *      to the buffer.
120  *
121  * These write modes control how current file position affects the behavior of
122  * updating sysctl values through the proc interface on each write.
123  */
124 enum sysctl_writes_mode {
125         SYSCTL_WRITES_LEGACY            = -1,
126         SYSCTL_WRITES_WARN              = 0,
127         SYSCTL_WRITES_STRICT            = 1,
128 };
129
130 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
131 #endif /* CONFIG_PROC_SYSCTL */
132
133 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
134     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
135 int sysctl_legacy_va_layout;
136 #endif
137
138 #endif /* CONFIG_SYSCTL */
139
140 /*
141  * /proc/sys support
142  */
143
144 #ifdef CONFIG_PROC_SYSCTL
145
146 static int _proc_do_string(char *data, int maxlen, int write,
147                 char *buffer, size_t *lenp, loff_t *ppos)
148 {
149         size_t len;
150         char c, *p;
151
152         if (!data || !maxlen || !*lenp) {
153                 *lenp = 0;
154                 return 0;
155         }
156
157         if (write) {
158                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
159                         /* Only continue writes not past the end of buffer. */
160                         len = strlen(data);
161                         if (len > maxlen - 1)
162                                 len = maxlen - 1;
163
164                         if (*ppos > len)
165                                 return 0;
166                         len = *ppos;
167                 } else {
168                         /* Start writing from beginning of buffer. */
169                         len = 0;
170                 }
171
172                 *ppos += *lenp;
173                 p = buffer;
174                 while ((p - buffer) < *lenp && len < maxlen - 1) {
175                         c = *(p++);
176                         if (c == 0 || c == '\n')
177                                 break;
178                         data[len++] = c;
179                 }
180                 data[len] = 0;
181         } else {
182                 len = strlen(data);
183                 if (len > maxlen)
184                         len = maxlen;
185
186                 if (*ppos > len) {
187                         *lenp = 0;
188                         return 0;
189                 }
190
191                 data += *ppos;
192                 len  -= *ppos;
193
194                 if (len > *lenp)
195                         len = *lenp;
196                 if (len)
197                         memcpy(buffer, data, len);
198                 if (len < *lenp) {
199                         buffer[len] = '\n';
200                         len++;
201                 }
202                 *lenp = len;
203                 *ppos += len;
204         }
205         return 0;
206 }
207
208 static void warn_sysctl_write(struct ctl_table *table)
209 {
210         pr_warn_once("%s wrote to %s when file position was not 0!\n"
211                 "This will not be supported in the future. To silence this\n"
212                 "warning, set kernel.sysctl_writes_strict = -1\n",
213                 current->comm, table->procname);
214 }
215
216 /**
217  * proc_first_pos_non_zero_ignore - check if first position is allowed
218  * @ppos: file position
219  * @table: the sysctl table
220  *
221  * Returns true if the first position is non-zero and the sysctl_writes_strict
222  * mode indicates this is not allowed for numeric input types. String proc
223  * handlers can ignore the return value.
224  */
225 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
226                                            struct ctl_table *table)
227 {
228         if (!*ppos)
229                 return false;
230
231         switch (sysctl_writes_strict) {
232         case SYSCTL_WRITES_STRICT:
233                 return true;
234         case SYSCTL_WRITES_WARN:
235                 warn_sysctl_write(table);
236                 return false;
237         default:
238                 return false;
239         }
240 }
241
242 /**
243  * proc_dostring - read a string sysctl
244  * @table: the sysctl table
245  * @write: %TRUE if this is a write to the sysctl file
246  * @buffer: the user buffer
247  * @lenp: the size of the user buffer
248  * @ppos: file position
249  *
250  * Reads/writes a string from/to the user buffer. If the kernel
251  * buffer provided is not large enough to hold the string, the
252  * string is truncated. The copied string is %NULL-terminated.
253  * If the string is being read by the user process, it is copied
254  * and a newline '\n' is added. It is truncated if the buffer is
255  * not large enough.
256  *
257  * Returns 0 on success.
258  */
259 int proc_dostring(struct ctl_table *table, int write,
260                   void *buffer, size_t *lenp, loff_t *ppos)
261 {
262         if (write)
263                 proc_first_pos_non_zero_ignore(ppos, table);
264
265         return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
266                         ppos);
267 }
268
269 static void proc_skip_spaces(char **buf, size_t *size)
270 {
271         while (*size) {
272                 if (!isspace(**buf))
273                         break;
274                 (*size)--;
275                 (*buf)++;
276         }
277 }
278
279 static void proc_skip_char(char **buf, size_t *size, const char v)
280 {
281         while (*size) {
282                 if (**buf != v)
283                         break;
284                 (*size)--;
285                 (*buf)++;
286         }
287 }
288
289 /**
290  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
291  *                   fail on overflow
292  *
293  * @cp: kernel buffer containing the string to parse
294  * @endp: pointer to store the trailing characters
295  * @base: the base to use
296  * @res: where the parsed integer will be stored
297  *
298  * In case of success 0 is returned and @res will contain the parsed integer,
299  * @endp will hold any trailing characters.
300  * This function will fail the parse on overflow. If there wasn't an overflow
301  * the function will defer the decision what characters count as invalid to the
302  * caller.
303  */
304 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
305                            unsigned long *res)
306 {
307         unsigned long long result;
308         unsigned int rv;
309
310         cp = _parse_integer_fixup_radix(cp, &base);
311         rv = _parse_integer(cp, base, &result);
312         if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
313                 return -ERANGE;
314
315         cp += rv;
316
317         if (endp)
318                 *endp = (char *)cp;
319
320         *res = (unsigned long)result;
321         return 0;
322 }
323
324 #define TMPBUFLEN 22
325 /**
326  * proc_get_long - reads an ASCII formatted integer from a user buffer
327  *
328  * @buf: a kernel buffer
329  * @size: size of the kernel buffer
330  * @val: this is where the number will be stored
331  * @neg: set to %TRUE if number is negative
332  * @perm_tr: a vector which contains the allowed trailers
333  * @perm_tr_len: size of the perm_tr vector
334  * @tr: pointer to store the trailer character
335  *
336  * In case of success %0 is returned and @buf and @size are updated with
337  * the amount of bytes read. If @tr is non-NULL and a trailing
338  * character exists (size is non-zero after returning from this
339  * function), @tr is updated with the trailing character.
340  */
341 static int proc_get_long(char **buf, size_t *size,
342                           unsigned long *val, bool *neg,
343                           const char *perm_tr, unsigned perm_tr_len, char *tr)
344 {
345         char *p, tmp[TMPBUFLEN];
346         ssize_t len = *size;
347
348         if (len <= 0)
349                 return -EINVAL;
350
351         if (len > TMPBUFLEN - 1)
352                 len = TMPBUFLEN - 1;
353
354         memcpy(tmp, *buf, len);
355
356         tmp[len] = 0;
357         p = tmp;
358         if (*p == '-' && *size > 1) {
359                 *neg = true;
360                 p++;
361         } else
362                 *neg = false;
363         if (!isdigit(*p))
364                 return -EINVAL;
365
366         if (strtoul_lenient(p, &p, 0, val))
367                 return -EINVAL;
368
369         len = p - tmp;
370
371         /* We don't know if the next char is whitespace thus we may accept
372          * invalid integers (e.g. 1234...a) or two integers instead of one
373          * (e.g. 123...1). So lets not allow such large numbers. */
374         if (len == TMPBUFLEN - 1)
375                 return -EINVAL;
376
377         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
378                 return -EINVAL;
379
380         if (tr && (len < *size))
381                 *tr = *p;
382
383         *buf += len;
384         *size -= len;
385
386         return 0;
387 }
388
389 /**
390  * proc_put_long - converts an integer to a decimal ASCII formatted string
391  *
392  * @buf: the user buffer
393  * @size: the size of the user buffer
394  * @val: the integer to be converted
395  * @neg: sign of the number, %TRUE for negative
396  *
397  * In case of success @buf and @size are updated with the amount of bytes
398  * written.
399  */
400 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
401 {
402         int len;
403         char tmp[TMPBUFLEN], *p = tmp;
404
405         sprintf(p, "%s%lu", neg ? "-" : "", val);
406         len = strlen(tmp);
407         if (len > *size)
408                 len = *size;
409         memcpy(*buf, tmp, len);
410         *size -= len;
411         *buf += len;
412 }
413 #undef TMPBUFLEN
414
415 static void proc_put_char(void **buf, size_t *size, char c)
416 {
417         if (*size) {
418                 char **buffer = (char **)buf;
419                 **buffer = c;
420
421                 (*size)--;
422                 (*buffer)++;
423                 *buf = *buffer;
424         }
425 }
426
427 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
428                                  int *valp,
429                                  int write, void *data)
430 {
431         if (write) {
432                 if (*negp) {
433                         if (*lvalp > (unsigned long) INT_MAX + 1)
434                                 return -EINVAL;
435                         WRITE_ONCE(*valp, -*lvalp);
436                 } else {
437                         if (*lvalp > (unsigned long) INT_MAX)
438                                 return -EINVAL;
439                         WRITE_ONCE(*valp, *lvalp);
440                 }
441         } else {
442                 int val = READ_ONCE(*valp);
443                 if (val < 0) {
444                         *negp = true;
445                         *lvalp = -(unsigned long)val;
446                 } else {
447                         *negp = false;
448                         *lvalp = (unsigned long)val;
449                 }
450         }
451         return 0;
452 }
453
454 static int do_proc_douintvec_conv(unsigned long *lvalp,
455                                   unsigned int *valp,
456                                   int write, void *data)
457 {
458         if (write) {
459                 if (*lvalp > UINT_MAX)
460                         return -EINVAL;
461                 WRITE_ONCE(*valp, *lvalp);
462         } else {
463                 unsigned int val = READ_ONCE(*valp);
464                 *lvalp = (unsigned long)val;
465         }
466         return 0;
467 }
468
469 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
470
471 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
472                   int write, void *buffer,
473                   size_t *lenp, loff_t *ppos,
474                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
475                               int write, void *data),
476                   void *data)
477 {
478         int *i, vleft, first = 1, err = 0;
479         size_t left;
480         char *p;
481
482         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
483                 *lenp = 0;
484                 return 0;
485         }
486
487         i = (int *) tbl_data;
488         vleft = table->maxlen / sizeof(*i);
489         left = *lenp;
490
491         if (!conv)
492                 conv = do_proc_dointvec_conv;
493
494         if (write) {
495                 if (proc_first_pos_non_zero_ignore(ppos, table))
496                         goto out;
497
498                 if (left > PAGE_SIZE - 1)
499                         left = PAGE_SIZE - 1;
500                 p = buffer;
501         }
502
503         for (; left && vleft--; i++, first=0) {
504                 unsigned long lval;
505                 bool neg;
506
507                 if (write) {
508                         proc_skip_spaces(&p, &left);
509
510                         if (!left)
511                                 break;
512                         err = proc_get_long(&p, &left, &lval, &neg,
513                                              proc_wspace_sep,
514                                              sizeof(proc_wspace_sep), NULL);
515                         if (err)
516                                 break;
517                         if (conv(&neg, &lval, i, 1, data)) {
518                                 err = -EINVAL;
519                                 break;
520                         }
521                 } else {
522                         if (conv(&neg, &lval, i, 0, data)) {
523                                 err = -EINVAL;
524                                 break;
525                         }
526                         if (!first)
527                                 proc_put_char(&buffer, &left, '\t');
528                         proc_put_long(&buffer, &left, lval, neg);
529                 }
530         }
531
532         if (!write && !first && left && !err)
533                 proc_put_char(&buffer, &left, '\n');
534         if (write && !err && left)
535                 proc_skip_spaces(&p, &left);
536         if (write && first)
537                 return err ? : -EINVAL;
538         *lenp -= left;
539 out:
540         *ppos += *lenp;
541         return err;
542 }
543
544 static int do_proc_dointvec(struct ctl_table *table, int write,
545                   void *buffer, size_t *lenp, loff_t *ppos,
546                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
547                               int write, void *data),
548                   void *data)
549 {
550         return __do_proc_dointvec(table->data, table, write,
551                         buffer, lenp, ppos, conv, data);
552 }
553
554 static int do_proc_douintvec_w(unsigned int *tbl_data,
555                                struct ctl_table *table,
556                                void *buffer,
557                                size_t *lenp, loff_t *ppos,
558                                int (*conv)(unsigned long *lvalp,
559                                            unsigned int *valp,
560                                            int write, void *data),
561                                void *data)
562 {
563         unsigned long lval;
564         int err = 0;
565         size_t left;
566         bool neg;
567         char *p = buffer;
568
569         left = *lenp;
570
571         if (proc_first_pos_non_zero_ignore(ppos, table))
572                 goto bail_early;
573
574         if (left > PAGE_SIZE - 1)
575                 left = PAGE_SIZE - 1;
576
577         proc_skip_spaces(&p, &left);
578         if (!left) {
579                 err = -EINVAL;
580                 goto out_free;
581         }
582
583         err = proc_get_long(&p, &left, &lval, &neg,
584                              proc_wspace_sep,
585                              sizeof(proc_wspace_sep), NULL);
586         if (err || neg) {
587                 err = -EINVAL;
588                 goto out_free;
589         }
590
591         if (conv(&lval, tbl_data, 1, data)) {
592                 err = -EINVAL;
593                 goto out_free;
594         }
595
596         if (!err && left)
597                 proc_skip_spaces(&p, &left);
598
599 out_free:
600         if (err)
601                 return -EINVAL;
602
603         return 0;
604
605         /* This is in keeping with old __do_proc_dointvec() */
606 bail_early:
607         *ppos += *lenp;
608         return err;
609 }
610
611 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
612                                size_t *lenp, loff_t *ppos,
613                                int (*conv)(unsigned long *lvalp,
614                                            unsigned int *valp,
615                                            int write, void *data),
616                                void *data)
617 {
618         unsigned long lval;
619         int err = 0;
620         size_t left;
621
622         left = *lenp;
623
624         if (conv(&lval, tbl_data, 0, data)) {
625                 err = -EINVAL;
626                 goto out;
627         }
628
629         proc_put_long(&buffer, &left, lval, false);
630         if (!left)
631                 goto out;
632
633         proc_put_char(&buffer, &left, '\n');
634
635 out:
636         *lenp -= left;
637         *ppos += *lenp;
638
639         return err;
640 }
641
642 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
643                                int write, void *buffer,
644                                size_t *lenp, loff_t *ppos,
645                                int (*conv)(unsigned long *lvalp,
646                                            unsigned int *valp,
647                                            int write, void *data),
648                                void *data)
649 {
650         unsigned int *i, vleft;
651
652         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
653                 *lenp = 0;
654                 return 0;
655         }
656
657         i = (unsigned int *) tbl_data;
658         vleft = table->maxlen / sizeof(*i);
659
660         /*
661          * Arrays are not supported, keep this simple. *Do not* add
662          * support for them.
663          */
664         if (vleft != 1) {
665                 *lenp = 0;
666                 return -EINVAL;
667         }
668
669         if (!conv)
670                 conv = do_proc_douintvec_conv;
671
672         if (write)
673                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
674                                            conv, data);
675         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
676 }
677
678 int do_proc_douintvec(struct ctl_table *table, int write,
679                       void *buffer, size_t *lenp, loff_t *ppos,
680                       int (*conv)(unsigned long *lvalp,
681                                   unsigned int *valp,
682                                   int write, void *data),
683                       void *data)
684 {
685         return __do_proc_douintvec(table->data, table, write,
686                                    buffer, lenp, ppos, conv, data);
687 }
688
689 /**
690  * proc_dobool - read/write a bool
691  * @table: the sysctl table
692  * @write: %TRUE if this is a write to the sysctl file
693  * @buffer: the user buffer
694  * @lenp: the size of the user buffer
695  * @ppos: file position
696  *
697  * Reads/writes one integer value from/to the user buffer,
698  * treated as an ASCII string.
699  *
700  * table->data must point to a bool variable and table->maxlen must
701  * be sizeof(bool).
702  *
703  * Returns 0 on success.
704  */
705 int proc_dobool(struct ctl_table *table, int write, void *buffer,
706                 size_t *lenp, loff_t *ppos)
707 {
708         struct ctl_table tmp;
709         bool *data = table->data;
710         int res, val;
711
712         /* Do not support arrays yet. */
713         if (table->maxlen != sizeof(bool))
714                 return -EINVAL;
715
716         tmp = *table;
717         tmp.maxlen = sizeof(val);
718         tmp.data = &val;
719
720         val = READ_ONCE(*data);
721         res = proc_dointvec(&tmp, write, buffer, lenp, ppos);
722         if (res)
723                 return res;
724         if (write)
725                 WRITE_ONCE(*data, val);
726         return 0;
727 }
728
729 /**
730  * proc_dointvec - read a vector of integers
731  * @table: the sysctl table
732  * @write: %TRUE if this is a write to the sysctl file
733  * @buffer: the user buffer
734  * @lenp: the size of the user buffer
735  * @ppos: file position
736  *
737  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
738  * values from/to the user buffer, treated as an ASCII string.
739  *
740  * Returns 0 on success.
741  */
742 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
743                   size_t *lenp, loff_t *ppos)
744 {
745         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
746 }
747
748 /**
749  * proc_douintvec - read a vector of unsigned integers
750  * @table: the sysctl table
751  * @write: %TRUE if this is a write to the sysctl file
752  * @buffer: the user buffer
753  * @lenp: the size of the user buffer
754  * @ppos: file position
755  *
756  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
757  * values from/to the user buffer, treated as an ASCII string.
758  *
759  * Returns 0 on success.
760  */
761 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
762                 size_t *lenp, loff_t *ppos)
763 {
764         return do_proc_douintvec(table, write, buffer, lenp, ppos,
765                                  do_proc_douintvec_conv, NULL);
766 }
767
768 /*
769  * Taint values can only be increased
770  * This means we can safely use a temporary.
771  */
772 static int proc_taint(struct ctl_table *table, int write,
773                                void *buffer, size_t *lenp, loff_t *ppos)
774 {
775         struct ctl_table t;
776         unsigned long tmptaint = get_taint();
777         int err;
778
779         if (write && !capable(CAP_SYS_ADMIN))
780                 return -EPERM;
781
782         t = *table;
783         t.data = &tmptaint;
784         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
785         if (err < 0)
786                 return err;
787
788         if (write) {
789                 int i;
790
791                 /*
792                  * If we are relying on panic_on_taint not producing
793                  * false positives due to userspace input, bail out
794                  * before setting the requested taint flags.
795                  */
796                 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
797                         return -EINVAL;
798
799                 /*
800                  * Poor man's atomic or. Not worth adding a primitive
801                  * to everyone's atomic.h for this
802                  */
803                 for (i = 0; i < TAINT_FLAGS_COUNT; i++)
804                         if ((1UL << i) & tmptaint)
805                                 add_taint(i, LOCKDEP_STILL_OK);
806         }
807
808         return err;
809 }
810
811 /**
812  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
813  * @min: pointer to minimum allowable value
814  * @max: pointer to maximum allowable value
815  *
816  * The do_proc_dointvec_minmax_conv_param structure provides the
817  * minimum and maximum values for doing range checking for those sysctl
818  * parameters that use the proc_dointvec_minmax() handler.
819  */
820 struct do_proc_dointvec_minmax_conv_param {
821         int *min;
822         int *max;
823 };
824
825 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
826                                         int *valp,
827                                         int write, void *data)
828 {
829         int tmp, ret;
830         struct do_proc_dointvec_minmax_conv_param *param = data;
831         /*
832          * If writing, first do so via a temporary local int so we can
833          * bounds-check it before touching *valp.
834          */
835         int *ip = write ? &tmp : valp;
836
837         ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
838         if (ret)
839                 return ret;
840
841         if (write) {
842                 if ((param->min && *param->min > tmp) ||
843                     (param->max && *param->max < tmp))
844                         return -EINVAL;
845                 WRITE_ONCE(*valp, tmp);
846         }
847
848         return 0;
849 }
850
851 /**
852  * proc_dointvec_minmax - read a vector of integers with min/max values
853  * @table: the sysctl table
854  * @write: %TRUE if this is a write to the sysctl file
855  * @buffer: the user buffer
856  * @lenp: the size of the user buffer
857  * @ppos: file position
858  *
859  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
860  * values from/to the user buffer, treated as an ASCII string.
861  *
862  * This routine will ensure the values are within the range specified by
863  * table->extra1 (min) and table->extra2 (max).
864  *
865  * Returns 0 on success or -EINVAL on write when the range check fails.
866  */
867 int proc_dointvec_minmax(struct ctl_table *table, int write,
868                   void *buffer, size_t *lenp, loff_t *ppos)
869 {
870         struct do_proc_dointvec_minmax_conv_param param = {
871                 .min = (int *) table->extra1,
872                 .max = (int *) table->extra2,
873         };
874         return do_proc_dointvec(table, write, buffer, lenp, ppos,
875                                 do_proc_dointvec_minmax_conv, &param);
876 }
877
878 /**
879  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
880  * @min: pointer to minimum allowable value
881  * @max: pointer to maximum allowable value
882  *
883  * The do_proc_douintvec_minmax_conv_param structure provides the
884  * minimum and maximum values for doing range checking for those sysctl
885  * parameters that use the proc_douintvec_minmax() handler.
886  */
887 struct do_proc_douintvec_minmax_conv_param {
888         unsigned int *min;
889         unsigned int *max;
890 };
891
892 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
893                                          unsigned int *valp,
894                                          int write, void *data)
895 {
896         int ret;
897         unsigned int tmp;
898         struct do_proc_douintvec_minmax_conv_param *param = data;
899         /* write via temporary local uint for bounds-checking */
900         unsigned int *up = write ? &tmp : valp;
901
902         ret = do_proc_douintvec_conv(lvalp, up, write, data);
903         if (ret)
904                 return ret;
905
906         if (write) {
907                 if ((param->min && *param->min > tmp) ||
908                     (param->max && *param->max < tmp))
909                         return -ERANGE;
910
911                 WRITE_ONCE(*valp, tmp);
912         }
913
914         return 0;
915 }
916
917 /**
918  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
919  * @table: the sysctl table
920  * @write: %TRUE if this is a write to the sysctl file
921  * @buffer: the user buffer
922  * @lenp: the size of the user buffer
923  * @ppos: file position
924  *
925  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
926  * values from/to the user buffer, treated as an ASCII string. Negative
927  * strings are not allowed.
928  *
929  * This routine will ensure the values are within the range specified by
930  * table->extra1 (min) and table->extra2 (max). There is a final sanity
931  * check for UINT_MAX to avoid having to support wrap around uses from
932  * userspace.
933  *
934  * Returns 0 on success or -ERANGE on write when the range check fails.
935  */
936 int proc_douintvec_minmax(struct ctl_table *table, int write,
937                           void *buffer, size_t *lenp, loff_t *ppos)
938 {
939         struct do_proc_douintvec_minmax_conv_param param = {
940                 .min = (unsigned int *) table->extra1,
941                 .max = (unsigned int *) table->extra2,
942         };
943         return do_proc_douintvec(table, write, buffer, lenp, ppos,
944                                  do_proc_douintvec_minmax_conv, &param);
945 }
946
947 /**
948  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
949  * @table: the sysctl table
950  * @write: %TRUE if this is a write to the sysctl file
951  * @buffer: the user buffer
952  * @lenp: the size of the user buffer
953  * @ppos: file position
954  *
955  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
956  * values from/to the user buffer, treated as an ASCII string. Negative
957  * strings are not allowed.
958  *
959  * This routine will ensure the values are within the range specified by
960  * table->extra1 (min) and table->extra2 (max).
961  *
962  * Returns 0 on success or an error on write when the range check fails.
963  */
964 int proc_dou8vec_minmax(struct ctl_table *table, int write,
965                         void *buffer, size_t *lenp, loff_t *ppos)
966 {
967         struct ctl_table tmp;
968         unsigned int min = 0, max = 255U, val;
969         u8 *data = table->data;
970         struct do_proc_douintvec_minmax_conv_param param = {
971                 .min = &min,
972                 .max = &max,
973         };
974         int res;
975
976         /* Do not support arrays yet. */
977         if (table->maxlen != sizeof(u8))
978                 return -EINVAL;
979
980         if (table->extra1) {
981                 min = *(unsigned int *) table->extra1;
982                 if (min > 255U)
983                         return -EINVAL;
984         }
985         if (table->extra2) {
986                 max = *(unsigned int *) table->extra2;
987                 if (max > 255U)
988                         return -EINVAL;
989         }
990
991         tmp = *table;
992
993         tmp.maxlen = sizeof(val);
994         tmp.data = &val;
995         val = READ_ONCE(*data);
996         res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
997                                 do_proc_douintvec_minmax_conv, &param);
998         if (res)
999                 return res;
1000         if (write)
1001                 WRITE_ONCE(*data, val);
1002         return 0;
1003 }
1004 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1005
1006 #ifdef CONFIG_MAGIC_SYSRQ
1007 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1008                                 void *buffer, size_t *lenp, loff_t *ppos)
1009 {
1010         int tmp, ret;
1011
1012         tmp = sysrq_mask();
1013
1014         ret = __do_proc_dointvec(&tmp, table, write, buffer,
1015                                lenp, ppos, NULL, NULL);
1016         if (ret || !write)
1017                 return ret;
1018
1019         if (write)
1020                 sysrq_toggle_support(tmp);
1021
1022         return 0;
1023 }
1024 #endif
1025
1026 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1027                 int write, void *buffer, size_t *lenp, loff_t *ppos,
1028                 unsigned long convmul, unsigned long convdiv)
1029 {
1030         unsigned long *i, *min, *max;
1031         int vleft, first = 1, err = 0;
1032         size_t left;
1033         char *p;
1034
1035         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1036                 *lenp = 0;
1037                 return 0;
1038         }
1039
1040         i = data;
1041         min = table->extra1;
1042         max = table->extra2;
1043         vleft = table->maxlen / sizeof(unsigned long);
1044         left = *lenp;
1045
1046         if (write) {
1047                 if (proc_first_pos_non_zero_ignore(ppos, table))
1048                         goto out;
1049
1050                 if (left > PAGE_SIZE - 1)
1051                         left = PAGE_SIZE - 1;
1052                 p = buffer;
1053         }
1054
1055         for (; left && vleft--; i++, first = 0) {
1056                 unsigned long val;
1057
1058                 if (write) {
1059                         bool neg;
1060
1061                         proc_skip_spaces(&p, &left);
1062                         if (!left)
1063                                 break;
1064
1065                         err = proc_get_long(&p, &left, &val, &neg,
1066                                              proc_wspace_sep,
1067                                              sizeof(proc_wspace_sep), NULL);
1068                         if (err || neg) {
1069                                 err = -EINVAL;
1070                                 break;
1071                         }
1072
1073                         val = convmul * val / convdiv;
1074                         if ((min && val < *min) || (max && val > *max)) {
1075                                 err = -EINVAL;
1076                                 break;
1077                         }
1078                         WRITE_ONCE(*i, val);
1079                 } else {
1080                         val = convdiv * READ_ONCE(*i) / convmul;
1081                         if (!first)
1082                                 proc_put_char(&buffer, &left, '\t');
1083                         proc_put_long(&buffer, &left, val, false);
1084                 }
1085         }
1086
1087         if (!write && !first && left && !err)
1088                 proc_put_char(&buffer, &left, '\n');
1089         if (write && !err)
1090                 proc_skip_spaces(&p, &left);
1091         if (write && first)
1092                 return err ? : -EINVAL;
1093         *lenp -= left;
1094 out:
1095         *ppos += *lenp;
1096         return err;
1097 }
1098
1099 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1100                 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1101                 unsigned long convdiv)
1102 {
1103         return __do_proc_doulongvec_minmax(table->data, table, write,
1104                         buffer, lenp, ppos, convmul, convdiv);
1105 }
1106
1107 /**
1108  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1109  * @table: the sysctl table
1110  * @write: %TRUE if this is a write to the sysctl file
1111  * @buffer: the user buffer
1112  * @lenp: the size of the user buffer
1113  * @ppos: file position
1114  *
1115  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1116  * values from/to the user buffer, treated as an ASCII string.
1117  *
1118  * This routine will ensure the values are within the range specified by
1119  * table->extra1 (min) and table->extra2 (max).
1120  *
1121  * Returns 0 on success.
1122  */
1123 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1124                            void *buffer, size_t *lenp, loff_t *ppos)
1125 {
1126     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1127 }
1128
1129 /**
1130  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1131  * @table: the sysctl table
1132  * @write: %TRUE if this is a write to the sysctl file
1133  * @buffer: the user buffer
1134  * @lenp: the size of the user buffer
1135  * @ppos: file position
1136  *
1137  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1138  * values from/to the user buffer, treated as an ASCII string. The values
1139  * are treated as milliseconds, and converted to jiffies when they are stored.
1140  *
1141  * This routine will ensure the values are within the range specified by
1142  * table->extra1 (min) and table->extra2 (max).
1143  *
1144  * Returns 0 on success.
1145  */
1146 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1147                                       void *buffer, size_t *lenp, loff_t *ppos)
1148 {
1149     return do_proc_doulongvec_minmax(table, write, buffer,
1150                                      lenp, ppos, HZ, 1000l);
1151 }
1152
1153
1154 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1155                                          int *valp,
1156                                          int write, void *data)
1157 {
1158         if (write) {
1159                 if (*lvalp > INT_MAX / HZ)
1160                         return 1;
1161                 if (*negp)
1162                         WRITE_ONCE(*valp, -*lvalp * HZ);
1163                 else
1164                         WRITE_ONCE(*valp, *lvalp * HZ);
1165         } else {
1166                 int val = READ_ONCE(*valp);
1167                 unsigned long lval;
1168                 if (val < 0) {
1169                         *negp = true;
1170                         lval = -(unsigned long)val;
1171                 } else {
1172                         *negp = false;
1173                         lval = (unsigned long)val;
1174                 }
1175                 *lvalp = lval / HZ;
1176         }
1177         return 0;
1178 }
1179
1180 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1181                                                 int *valp,
1182                                                 int write, void *data)
1183 {
1184         if (write) {
1185                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1186                         return 1;
1187                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1188         } else {
1189                 int val = *valp;
1190                 unsigned long lval;
1191                 if (val < 0) {
1192                         *negp = true;
1193                         lval = -(unsigned long)val;
1194                 } else {
1195                         *negp = false;
1196                         lval = (unsigned long)val;
1197                 }
1198                 *lvalp = jiffies_to_clock_t(lval);
1199         }
1200         return 0;
1201 }
1202
1203 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1204                                             int *valp,
1205                                             int write, void *data)
1206 {
1207         if (write) {
1208                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1209
1210                 if (jif > INT_MAX)
1211                         return 1;
1212                 WRITE_ONCE(*valp, (int)jif);
1213         } else {
1214                 int val = READ_ONCE(*valp);
1215                 unsigned long lval;
1216                 if (val < 0) {
1217                         *negp = true;
1218                         lval = -(unsigned long)val;
1219                 } else {
1220                         *negp = false;
1221                         lval = (unsigned long)val;
1222                 }
1223                 *lvalp = jiffies_to_msecs(lval);
1224         }
1225         return 0;
1226 }
1227
1228 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1229                                                 int *valp, int write, void *data)
1230 {
1231         int tmp, ret;
1232         struct do_proc_dointvec_minmax_conv_param *param = data;
1233         /*
1234          * If writing, first do so via a temporary local int so we can
1235          * bounds-check it before touching *valp.
1236          */
1237         int *ip = write ? &tmp : valp;
1238
1239         ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1240         if (ret)
1241                 return ret;
1242
1243         if (write) {
1244                 if ((param->min && *param->min > tmp) ||
1245                                 (param->max && *param->max < tmp))
1246                         return -EINVAL;
1247                 *valp = tmp;
1248         }
1249         return 0;
1250 }
1251
1252 /**
1253  * proc_dointvec_jiffies - read a vector of integers as seconds
1254  * @table: the sysctl table
1255  * @write: %TRUE if this is a write to the sysctl file
1256  * @buffer: the user buffer
1257  * @lenp: the size of the user buffer
1258  * @ppos: file position
1259  *
1260  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1261  * values from/to the user buffer, treated as an ASCII string.
1262  * The values read are assumed to be in seconds, and are converted into
1263  * jiffies.
1264  *
1265  * Returns 0 on success.
1266  */
1267 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1268                           void *buffer, size_t *lenp, loff_t *ppos)
1269 {
1270     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1271                             do_proc_dointvec_jiffies_conv,NULL);
1272 }
1273
1274 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1275                           void *buffer, size_t *lenp, loff_t *ppos)
1276 {
1277         struct do_proc_dointvec_minmax_conv_param param = {
1278                 .min = (int *) table->extra1,
1279                 .max = (int *) table->extra2,
1280         };
1281         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1282                         do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1283 }
1284
1285 /**
1286  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1287  * @table: the sysctl table
1288  * @write: %TRUE if this is a write to the sysctl file
1289  * @buffer: the user buffer
1290  * @lenp: the size of the user buffer
1291  * @ppos: pointer to the file position
1292  *
1293  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1294  * values from/to the user buffer, treated as an ASCII string.
1295  * The values read are assumed to be in 1/USER_HZ seconds, and
1296  * are converted into jiffies.
1297  *
1298  * Returns 0 on success.
1299  */
1300 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1301                                  void *buffer, size_t *lenp, loff_t *ppos)
1302 {
1303         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1304                                 do_proc_dointvec_userhz_jiffies_conv, NULL);
1305 }
1306
1307 /**
1308  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1309  * @table: the sysctl table
1310  * @write: %TRUE if this is a write to the sysctl file
1311  * @buffer: the user buffer
1312  * @lenp: the size of the user buffer
1313  * @ppos: file position
1314  * @ppos: the current position in the file
1315  *
1316  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1317  * values from/to the user buffer, treated as an ASCII string.
1318  * The values read are assumed to be in 1/1000 seconds, and
1319  * are converted into jiffies.
1320  *
1321  * Returns 0 on success.
1322  */
1323 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1324                 size_t *lenp, loff_t *ppos)
1325 {
1326         return do_proc_dointvec(table, write, buffer, lenp, ppos,
1327                                 do_proc_dointvec_ms_jiffies_conv, NULL);
1328 }
1329
1330 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1331                 size_t *lenp, loff_t *ppos)
1332 {
1333         struct pid *new_pid;
1334         pid_t tmp;
1335         int r;
1336
1337         tmp = pid_vnr(cad_pid);
1338
1339         r = __do_proc_dointvec(&tmp, table, write, buffer,
1340                                lenp, ppos, NULL, NULL);
1341         if (r || !write)
1342                 return r;
1343
1344         new_pid = find_get_pid(tmp);
1345         if (!new_pid)
1346                 return -ESRCH;
1347
1348         put_pid(xchg(&cad_pid, new_pid));
1349         return 0;
1350 }
1351
1352 /**
1353  * proc_do_large_bitmap - read/write from/to a large bitmap
1354  * @table: the sysctl table
1355  * @write: %TRUE if this is a write to the sysctl file
1356  * @buffer: the user buffer
1357  * @lenp: the size of the user buffer
1358  * @ppos: file position
1359  *
1360  * The bitmap is stored at table->data and the bitmap length (in bits)
1361  * in table->maxlen.
1362  *
1363  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1364  * large bitmaps may be represented in a compact manner. Writing into
1365  * the file will clear the bitmap then update it with the given input.
1366  *
1367  * Returns 0 on success.
1368  */
1369 int proc_do_large_bitmap(struct ctl_table *table, int write,
1370                          void *buffer, size_t *lenp, loff_t *ppos)
1371 {
1372         int err = 0;
1373         size_t left = *lenp;
1374         unsigned long bitmap_len = table->maxlen;
1375         unsigned long *bitmap = *(unsigned long **) table->data;
1376         unsigned long *tmp_bitmap = NULL;
1377         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1378
1379         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1380                 *lenp = 0;
1381                 return 0;
1382         }
1383
1384         if (write) {
1385                 char *p = buffer;
1386                 size_t skipped = 0;
1387
1388                 if (left > PAGE_SIZE - 1) {
1389                         left = PAGE_SIZE - 1;
1390                         /* How much of the buffer we'll skip this pass */
1391                         skipped = *lenp - left;
1392                 }
1393
1394                 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1395                 if (!tmp_bitmap)
1396                         return -ENOMEM;
1397                 proc_skip_char(&p, &left, '\n');
1398                 while (!err && left) {
1399                         unsigned long val_a, val_b;
1400                         bool neg;
1401                         size_t saved_left;
1402
1403                         /* In case we stop parsing mid-number, we can reset */
1404                         saved_left = left;
1405                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1406                                              sizeof(tr_a), &c);
1407                         /*
1408                          * If we consumed the entirety of a truncated buffer or
1409                          * only one char is left (may be a "-"), then stop here,
1410                          * reset, & come back for more.
1411                          */
1412                         if ((left <= 1) && skipped) {
1413                                 left = saved_left;
1414                                 break;
1415                         }
1416
1417                         if (err)
1418                                 break;
1419                         if (val_a >= bitmap_len || neg) {
1420                                 err = -EINVAL;
1421                                 break;
1422                         }
1423
1424                         val_b = val_a;
1425                         if (left) {
1426                                 p++;
1427                                 left--;
1428                         }
1429
1430                         if (c == '-') {
1431                                 err = proc_get_long(&p, &left, &val_b,
1432                                                      &neg, tr_b, sizeof(tr_b),
1433                                                      &c);
1434                                 /*
1435                                  * If we consumed all of a truncated buffer or
1436                                  * then stop here, reset, & come back for more.
1437                                  */
1438                                 if (!left && skipped) {
1439                                         left = saved_left;
1440                                         break;
1441                                 }
1442
1443                                 if (err)
1444                                         break;
1445                                 if (val_b >= bitmap_len || neg ||
1446                                     val_a > val_b) {
1447                                         err = -EINVAL;
1448                                         break;
1449                                 }
1450                                 if (left) {
1451                                         p++;
1452                                         left--;
1453                                 }
1454                         }
1455
1456                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1457                         proc_skip_char(&p, &left, '\n');
1458                 }
1459                 left += skipped;
1460         } else {
1461                 unsigned long bit_a, bit_b = 0;
1462                 bool first = 1;
1463
1464                 while (left) {
1465                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1466                         if (bit_a >= bitmap_len)
1467                                 break;
1468                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
1469                                                    bit_a + 1) - 1;
1470
1471                         if (!first)
1472                                 proc_put_char(&buffer, &left, ',');
1473                         proc_put_long(&buffer, &left, bit_a, false);
1474                         if (bit_a != bit_b) {
1475                                 proc_put_char(&buffer, &left, '-');
1476                                 proc_put_long(&buffer, &left, bit_b, false);
1477                         }
1478
1479                         first = 0; bit_b++;
1480                 }
1481                 proc_put_char(&buffer, &left, '\n');
1482         }
1483
1484         if (!err) {
1485                 if (write) {
1486                         if (*ppos)
1487                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1488                         else
1489                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1490                 }
1491                 *lenp -= left;
1492                 *ppos += *lenp;
1493         }
1494
1495         bitmap_free(tmp_bitmap);
1496         return err;
1497 }
1498
1499 #else /* CONFIG_PROC_SYSCTL */
1500
1501 int proc_dostring(struct ctl_table *table, int write,
1502                   void *buffer, size_t *lenp, loff_t *ppos)
1503 {
1504         return -ENOSYS;
1505 }
1506
1507 int proc_dobool(struct ctl_table *table, int write,
1508                 void *buffer, size_t *lenp, loff_t *ppos)
1509 {
1510         return -ENOSYS;
1511 }
1512
1513 int proc_dointvec(struct ctl_table *table, int write,
1514                   void *buffer, size_t *lenp, loff_t *ppos)
1515 {
1516         return -ENOSYS;
1517 }
1518
1519 int proc_douintvec(struct ctl_table *table, int write,
1520                   void *buffer, size_t *lenp, loff_t *ppos)
1521 {
1522         return -ENOSYS;
1523 }
1524
1525 int proc_dointvec_minmax(struct ctl_table *table, int write,
1526                     void *buffer, size_t *lenp, loff_t *ppos)
1527 {
1528         return -ENOSYS;
1529 }
1530
1531 int proc_douintvec_minmax(struct ctl_table *table, int write,
1532                           void *buffer, size_t *lenp, loff_t *ppos)
1533 {
1534         return -ENOSYS;
1535 }
1536
1537 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1538                         void *buffer, size_t *lenp, loff_t *ppos)
1539 {
1540         return -ENOSYS;
1541 }
1542
1543 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1544                     void *buffer, size_t *lenp, loff_t *ppos)
1545 {
1546         return -ENOSYS;
1547 }
1548
1549 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1550                                     void *buffer, size_t *lenp, loff_t *ppos)
1551 {
1552         return -ENOSYS;
1553 }
1554
1555 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1556                     void *buffer, size_t *lenp, loff_t *ppos)
1557 {
1558         return -ENOSYS;
1559 }
1560
1561 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1562                              void *buffer, size_t *lenp, loff_t *ppos)
1563 {
1564         return -ENOSYS;
1565 }
1566
1567 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1568                     void *buffer, size_t *lenp, loff_t *ppos)
1569 {
1570         return -ENOSYS;
1571 }
1572
1573 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1574                                       void *buffer, size_t *lenp, loff_t *ppos)
1575 {
1576         return -ENOSYS;
1577 }
1578
1579 int proc_do_large_bitmap(struct ctl_table *table, int write,
1580                          void *buffer, size_t *lenp, loff_t *ppos)
1581 {
1582         return -ENOSYS;
1583 }
1584
1585 #endif /* CONFIG_PROC_SYSCTL */
1586
1587 #if defined(CONFIG_SYSCTL)
1588 int proc_do_static_key(struct ctl_table *table, int write,
1589                        void *buffer, size_t *lenp, loff_t *ppos)
1590 {
1591         struct static_key *key = (struct static_key *)table->data;
1592         static DEFINE_MUTEX(static_key_mutex);
1593         int val, ret;
1594         struct ctl_table tmp = {
1595                 .data   = &val,
1596                 .maxlen = sizeof(val),
1597                 .mode   = table->mode,
1598                 .extra1 = SYSCTL_ZERO,
1599                 .extra2 = SYSCTL_ONE,
1600         };
1601
1602         if (write && !capable(CAP_SYS_ADMIN))
1603                 return -EPERM;
1604
1605         mutex_lock(&static_key_mutex);
1606         val = static_key_enabled(key);
1607         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1608         if (write && !ret) {
1609                 if (val)
1610                         static_key_enable(key);
1611                 else
1612                         static_key_disable(key);
1613         }
1614         mutex_unlock(&static_key_mutex);
1615         return ret;
1616 }
1617
1618 static struct ctl_table kern_table[] = {
1619         {
1620                 .procname       = "panic",
1621                 .data           = &panic_timeout,
1622                 .maxlen         = sizeof(int),
1623                 .mode           = 0644,
1624                 .proc_handler   = proc_dointvec,
1625         },
1626 #ifdef CONFIG_PROC_SYSCTL
1627         {
1628                 .procname       = "tainted",
1629                 .maxlen         = sizeof(long),
1630                 .mode           = 0644,
1631                 .proc_handler   = proc_taint,
1632         },
1633         {
1634                 .procname       = "sysctl_writes_strict",
1635                 .data           = &sysctl_writes_strict,
1636                 .maxlen         = sizeof(int),
1637                 .mode           = 0644,
1638                 .proc_handler   = proc_dointvec_minmax,
1639                 .extra1         = SYSCTL_NEG_ONE,
1640                 .extra2         = SYSCTL_ONE,
1641         },
1642 #endif
1643         {
1644                 .procname       = "print-fatal-signals",
1645                 .data           = &print_fatal_signals,
1646                 .maxlen         = sizeof(int),
1647                 .mode           = 0644,
1648                 .proc_handler   = proc_dointvec,
1649         },
1650 #ifdef CONFIG_SPARC
1651         {
1652                 .procname       = "reboot-cmd",
1653                 .data           = reboot_command,
1654                 .maxlen         = 256,
1655                 .mode           = 0644,
1656                 .proc_handler   = proc_dostring,
1657         },
1658         {
1659                 .procname       = "stop-a",
1660                 .data           = &stop_a_enabled,
1661                 .maxlen         = sizeof (int),
1662                 .mode           = 0644,
1663                 .proc_handler   = proc_dointvec,
1664         },
1665         {
1666                 .procname       = "scons-poweroff",
1667                 .data           = &scons_pwroff,
1668                 .maxlen         = sizeof (int),
1669                 .mode           = 0644,
1670                 .proc_handler   = proc_dointvec,
1671         },
1672 #endif
1673 #ifdef CONFIG_SPARC64
1674         {
1675                 .procname       = "tsb-ratio",
1676                 .data           = &sysctl_tsb_ratio,
1677                 .maxlen         = sizeof (int),
1678                 .mode           = 0644,
1679                 .proc_handler   = proc_dointvec,
1680         },
1681 #endif
1682 #ifdef CONFIG_PARISC
1683         {
1684                 .procname       = "soft-power",
1685                 .data           = &pwrsw_enabled,
1686                 .maxlen         = sizeof (int),
1687                 .mode           = 0644,
1688                 .proc_handler   = proc_dointvec,
1689         },
1690 #endif
1691 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1692         {
1693                 .procname       = "unaligned-trap",
1694                 .data           = &unaligned_enabled,
1695                 .maxlen         = sizeof (int),
1696                 .mode           = 0644,
1697                 .proc_handler   = proc_dointvec,
1698         },
1699 #endif
1700 #ifdef CONFIG_STACK_TRACER
1701         {
1702                 .procname       = "stack_tracer_enabled",
1703                 .data           = &stack_tracer_enabled,
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = stack_trace_sysctl,
1707         },
1708 #endif
1709 #ifdef CONFIG_TRACING
1710         {
1711                 .procname       = "ftrace_dump_on_oops",
1712                 .data           = &ftrace_dump_on_oops,
1713                 .maxlen         = sizeof(int),
1714                 .mode           = 0644,
1715                 .proc_handler   = proc_dointvec,
1716         },
1717         {
1718                 .procname       = "traceoff_on_warning",
1719                 .data           = &__disable_trace_on_warning,
1720                 .maxlen         = sizeof(__disable_trace_on_warning),
1721                 .mode           = 0644,
1722                 .proc_handler   = proc_dointvec,
1723         },
1724         {
1725                 .procname       = "tracepoint_printk",
1726                 .data           = &tracepoint_printk,
1727                 .maxlen         = sizeof(tracepoint_printk),
1728                 .mode           = 0644,
1729                 .proc_handler   = tracepoint_printk_sysctl,
1730         },
1731 #endif
1732 #ifdef CONFIG_MODULES
1733         {
1734                 .procname       = "modprobe",
1735                 .data           = &modprobe_path,
1736                 .maxlen         = KMOD_PATH_LEN,
1737                 .mode           = 0644,
1738                 .proc_handler   = proc_dostring,
1739         },
1740         {
1741                 .procname       = "modules_disabled",
1742                 .data           = &modules_disabled,
1743                 .maxlen         = sizeof(int),
1744                 .mode           = 0644,
1745                 /* only handle a transition from default "0" to "1" */
1746                 .proc_handler   = proc_dointvec_minmax,
1747                 .extra1         = SYSCTL_ONE,
1748                 .extra2         = SYSCTL_ONE,
1749         },
1750 #endif
1751 #ifdef CONFIG_UEVENT_HELPER
1752         {
1753                 .procname       = "hotplug",
1754                 .data           = &uevent_helper,
1755                 .maxlen         = UEVENT_HELPER_PATH_LEN,
1756                 .mode           = 0644,
1757                 .proc_handler   = proc_dostring,
1758         },
1759 #endif
1760 #ifdef CONFIG_MAGIC_SYSRQ
1761         {
1762                 .procname       = "sysrq",
1763                 .data           = NULL,
1764                 .maxlen         = sizeof (int),
1765                 .mode           = 0644,
1766                 .proc_handler   = sysrq_sysctl_handler,
1767         },
1768 #endif
1769 #ifdef CONFIG_PROC_SYSCTL
1770         {
1771                 .procname       = "cad_pid",
1772                 .data           = NULL,
1773                 .maxlen         = sizeof (int),
1774                 .mode           = 0600,
1775                 .proc_handler   = proc_do_cad_pid,
1776         },
1777 #endif
1778         {
1779                 .procname       = "threads-max",
1780                 .data           = NULL,
1781                 .maxlen         = sizeof(int),
1782                 .mode           = 0644,
1783                 .proc_handler   = sysctl_max_threads,
1784         },
1785         {
1786                 .procname       = "usermodehelper",
1787                 .mode           = 0555,
1788                 .child          = usermodehelper_table,
1789         },
1790         {
1791                 .procname       = "overflowuid",
1792                 .data           = &overflowuid,
1793                 .maxlen         = sizeof(int),
1794                 .mode           = 0644,
1795                 .proc_handler   = proc_dointvec_minmax,
1796                 .extra1         = SYSCTL_ZERO,
1797                 .extra2         = SYSCTL_MAXOLDUID,
1798         },
1799         {
1800                 .procname       = "overflowgid",
1801                 .data           = &overflowgid,
1802                 .maxlen         = sizeof(int),
1803                 .mode           = 0644,
1804                 .proc_handler   = proc_dointvec_minmax,
1805                 .extra1         = SYSCTL_ZERO,
1806                 .extra2         = SYSCTL_MAXOLDUID,
1807         },
1808 #ifdef CONFIG_S390
1809         {
1810                 .procname       = "userprocess_debug",
1811                 .data           = &show_unhandled_signals,
1812                 .maxlen         = sizeof(int),
1813                 .mode           = 0644,
1814                 .proc_handler   = proc_dointvec,
1815         },
1816 #endif
1817         {
1818                 .procname       = "pid_max",
1819                 .data           = &pid_max,
1820                 .maxlen         = sizeof (int),
1821                 .mode           = 0644,
1822                 .proc_handler   = proc_dointvec_minmax,
1823                 .extra1         = &pid_max_min,
1824                 .extra2         = &pid_max_max,
1825         },
1826         {
1827                 .procname       = "panic_on_oops",
1828                 .data           = &panic_on_oops,
1829                 .maxlen         = sizeof(int),
1830                 .mode           = 0644,
1831                 .proc_handler   = proc_dointvec,
1832         },
1833         {
1834                 .procname       = "panic_print",
1835                 .data           = &panic_print,
1836                 .maxlen         = sizeof(unsigned long),
1837                 .mode           = 0644,
1838                 .proc_handler   = proc_doulongvec_minmax,
1839         },
1840         {
1841                 .procname       = "ngroups_max",
1842                 .data           = (void *)&ngroups_max,
1843                 .maxlen         = sizeof (int),
1844                 .mode           = 0444,
1845                 .proc_handler   = proc_dointvec,
1846         },
1847         {
1848                 .procname       = "cap_last_cap",
1849                 .data           = (void *)&cap_last_cap,
1850                 .maxlen         = sizeof(int),
1851                 .mode           = 0444,
1852                 .proc_handler   = proc_dointvec,
1853         },
1854 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1855         {
1856                 .procname       = "unknown_nmi_panic",
1857                 .data           = &unknown_nmi_panic,
1858                 .maxlen         = sizeof (int),
1859                 .mode           = 0644,
1860                 .proc_handler   = proc_dointvec,
1861         },
1862 #endif
1863
1864 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1865         defined(CONFIG_DEBUG_STACKOVERFLOW)
1866         {
1867                 .procname       = "panic_on_stackoverflow",
1868                 .data           = &sysctl_panic_on_stackoverflow,
1869                 .maxlen         = sizeof(int),
1870                 .mode           = 0644,
1871                 .proc_handler   = proc_dointvec,
1872         },
1873 #endif
1874 #if defined(CONFIG_X86)
1875         {
1876                 .procname       = "panic_on_unrecovered_nmi",
1877                 .data           = &panic_on_unrecovered_nmi,
1878                 .maxlen         = sizeof(int),
1879                 .mode           = 0644,
1880                 .proc_handler   = proc_dointvec,
1881         },
1882         {
1883                 .procname       = "panic_on_io_nmi",
1884                 .data           = &panic_on_io_nmi,
1885                 .maxlen         = sizeof(int),
1886                 .mode           = 0644,
1887                 .proc_handler   = proc_dointvec,
1888         },
1889         {
1890                 .procname       = "bootloader_type",
1891                 .data           = &bootloader_type,
1892                 .maxlen         = sizeof (int),
1893                 .mode           = 0444,
1894                 .proc_handler   = proc_dointvec,
1895         },
1896         {
1897                 .procname       = "bootloader_version",
1898                 .data           = &bootloader_version,
1899                 .maxlen         = sizeof (int),
1900                 .mode           = 0444,
1901                 .proc_handler   = proc_dointvec,
1902         },
1903         {
1904                 .procname       = "io_delay_type",
1905                 .data           = &io_delay_type,
1906                 .maxlen         = sizeof(int),
1907                 .mode           = 0644,
1908                 .proc_handler   = proc_dointvec,
1909         },
1910 #endif
1911 #if defined(CONFIG_MMU)
1912         {
1913                 .procname       = "randomize_va_space",
1914                 .data           = &randomize_va_space,
1915                 .maxlen         = sizeof(int),
1916                 .mode           = 0644,
1917                 .proc_handler   = proc_dointvec,
1918         },
1919 #endif
1920 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1921         {
1922                 .procname       = "spin_retry",
1923                 .data           = &spin_retry,
1924                 .maxlen         = sizeof (int),
1925                 .mode           = 0644,
1926                 .proc_handler   = proc_dointvec,
1927         },
1928 #endif
1929 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1930         {
1931                 .procname       = "acpi_video_flags",
1932                 .data           = &acpi_realmode_flags,
1933                 .maxlen         = sizeof (unsigned long),
1934                 .mode           = 0644,
1935                 .proc_handler   = proc_doulongvec_minmax,
1936         },
1937 #endif
1938 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1939         {
1940                 .procname       = "ignore-unaligned-usertrap",
1941                 .data           = &no_unaligned_warning,
1942                 .maxlen         = sizeof (int),
1943                 .mode           = 0644,
1944                 .proc_handler   = proc_dointvec,
1945         },
1946 #endif
1947 #ifdef CONFIG_IA64
1948         {
1949                 .procname       = "unaligned-dump-stack",
1950                 .data           = &unaligned_dump_stack,
1951                 .maxlen         = sizeof (int),
1952                 .mode           = 0644,
1953                 .proc_handler   = proc_dointvec,
1954         },
1955 #endif
1956 #ifdef CONFIG_RT_MUTEXES
1957         {
1958                 .procname       = "max_lock_depth",
1959                 .data           = &max_lock_depth,
1960                 .maxlen         = sizeof(int),
1961                 .mode           = 0644,
1962                 .proc_handler   = proc_dointvec,
1963         },
1964 #endif
1965 #ifdef CONFIG_KEYS
1966         {
1967                 .procname       = "keys",
1968                 .mode           = 0555,
1969                 .child          = key_sysctls,
1970         },
1971 #endif
1972 #ifdef CONFIG_PERF_EVENTS
1973         /*
1974          * User-space scripts rely on the existence of this file
1975          * as a feature check for perf_events being enabled.
1976          *
1977          * So it's an ABI, do not remove!
1978          */
1979         {
1980                 .procname       = "perf_event_paranoid",
1981                 .data           = &sysctl_perf_event_paranoid,
1982                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1983                 .mode           = 0644,
1984                 .proc_handler   = proc_dointvec,
1985         },
1986         {
1987                 .procname       = "perf_event_mlock_kb",
1988                 .data           = &sysctl_perf_event_mlock,
1989                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1990                 .mode           = 0644,
1991                 .proc_handler   = proc_dointvec,
1992         },
1993         {
1994                 .procname       = "perf_event_max_sample_rate",
1995                 .data           = &sysctl_perf_event_sample_rate,
1996                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1997                 .mode           = 0644,
1998                 .proc_handler   = perf_proc_update_handler,
1999                 .extra1         = SYSCTL_ONE,
2000         },
2001         {
2002                 .procname       = "perf_cpu_time_max_percent",
2003                 .data           = &sysctl_perf_cpu_time_max_percent,
2004                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
2005                 .mode           = 0644,
2006                 .proc_handler   = perf_cpu_time_max_percent_handler,
2007                 .extra1         = SYSCTL_ZERO,
2008                 .extra2         = SYSCTL_ONE_HUNDRED,
2009         },
2010         {
2011                 .procname       = "perf_event_max_stack",
2012                 .data           = &sysctl_perf_event_max_stack,
2013                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
2014                 .mode           = 0644,
2015                 .proc_handler   = perf_event_max_stack_handler,
2016                 .extra1         = SYSCTL_ZERO,
2017                 .extra2         = (void *)&six_hundred_forty_kb,
2018         },
2019         {
2020                 .procname       = "perf_event_max_contexts_per_stack",
2021                 .data           = &sysctl_perf_event_max_contexts_per_stack,
2022                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
2023                 .mode           = 0644,
2024                 .proc_handler   = perf_event_max_stack_handler,
2025                 .extra1         = SYSCTL_ZERO,
2026                 .extra2         = SYSCTL_ONE_THOUSAND,
2027         },
2028 #endif
2029         {
2030                 .procname       = "panic_on_warn",
2031                 .data           = &panic_on_warn,
2032                 .maxlen         = sizeof(int),
2033                 .mode           = 0644,
2034                 .proc_handler   = proc_dointvec_minmax,
2035                 .extra1         = SYSCTL_ZERO,
2036                 .extra2         = SYSCTL_ONE,
2037         },
2038 #ifdef CONFIG_TREE_RCU
2039         {
2040                 .procname       = "panic_on_rcu_stall",
2041                 .data           = &sysctl_panic_on_rcu_stall,
2042                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
2043                 .mode           = 0644,
2044                 .proc_handler   = proc_dointvec_minmax,
2045                 .extra1         = SYSCTL_ZERO,
2046                 .extra2         = SYSCTL_ONE,
2047         },
2048         {
2049                 .procname       = "max_rcu_stall_to_panic",
2050                 .data           = &sysctl_max_rcu_stall_to_panic,
2051                 .maxlen         = sizeof(sysctl_max_rcu_stall_to_panic),
2052                 .mode           = 0644,
2053                 .proc_handler   = proc_dointvec_minmax,
2054                 .extra1         = SYSCTL_ONE,
2055                 .extra2         = SYSCTL_INT_MAX,
2056         },
2057 #endif
2058         { }
2059 };
2060
2061 static struct ctl_table vm_table[] = {
2062         {
2063                 .procname       = "overcommit_memory",
2064                 .data           = &sysctl_overcommit_memory,
2065                 .maxlen         = sizeof(sysctl_overcommit_memory),
2066                 .mode           = 0644,
2067                 .proc_handler   = overcommit_policy_handler,
2068                 .extra1         = SYSCTL_ZERO,
2069                 .extra2         = SYSCTL_TWO,
2070         },
2071         {
2072                 .procname       = "overcommit_ratio",
2073                 .data           = &sysctl_overcommit_ratio,
2074                 .maxlen         = sizeof(sysctl_overcommit_ratio),
2075                 .mode           = 0644,
2076                 .proc_handler   = overcommit_ratio_handler,
2077         },
2078         {
2079                 .procname       = "overcommit_kbytes",
2080                 .data           = &sysctl_overcommit_kbytes,
2081                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
2082                 .mode           = 0644,
2083                 .proc_handler   = overcommit_kbytes_handler,
2084         },
2085         {
2086                 .procname       = "page-cluster",
2087                 .data           = &page_cluster,
2088                 .maxlen         = sizeof(int),
2089                 .mode           = 0644,
2090                 .proc_handler   = proc_dointvec_minmax,
2091                 .extra1         = SYSCTL_ZERO,
2092                 .extra2         = (void *)&page_cluster_max,
2093         },
2094         {
2095                 .procname       = "dirtytime_expire_seconds",
2096                 .data           = &dirtytime_expire_interval,
2097                 .maxlen         = sizeof(dirtytime_expire_interval),
2098                 .mode           = 0644,
2099                 .proc_handler   = dirtytime_interval_handler,
2100                 .extra1         = SYSCTL_ZERO,
2101         },
2102         {
2103                 .procname       = "swappiness",
2104                 .data           = &vm_swappiness,
2105                 .maxlen         = sizeof(vm_swappiness),
2106                 .mode           = 0644,
2107                 .proc_handler   = proc_dointvec_minmax,
2108                 .extra1         = SYSCTL_ZERO,
2109                 .extra2         = SYSCTL_TWO_HUNDRED,
2110         },
2111 #ifdef CONFIG_NUMA
2112         {
2113                 .procname       = "numa_stat",
2114                 .data           = &sysctl_vm_numa_stat,
2115                 .maxlen         = sizeof(int),
2116                 .mode           = 0644,
2117                 .proc_handler   = sysctl_vm_numa_stat_handler,
2118                 .extra1         = SYSCTL_ZERO,
2119                 .extra2         = SYSCTL_ONE,
2120         },
2121 #endif
2122         {
2123                 .procname       = "lowmem_reserve_ratio",
2124                 .data           = &sysctl_lowmem_reserve_ratio,
2125                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
2126                 .mode           = 0644,
2127                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
2128         },
2129         {
2130                 .procname       = "drop_caches",
2131                 .data           = &sysctl_drop_caches,
2132                 .maxlen         = sizeof(int),
2133                 .mode           = 0200,
2134                 .proc_handler   = drop_caches_sysctl_handler,
2135                 .extra1         = SYSCTL_ONE,
2136                 .extra2         = SYSCTL_FOUR,
2137         },
2138         {
2139                 .procname       = "min_free_kbytes",
2140                 .data           = &min_free_kbytes,
2141                 .maxlen         = sizeof(min_free_kbytes),
2142                 .mode           = 0644,
2143                 .proc_handler   = min_free_kbytes_sysctl_handler,
2144                 .extra1         = SYSCTL_ZERO,
2145         },
2146         {
2147                 .procname       = "watermark_boost_factor",
2148                 .data           = &watermark_boost_factor,
2149                 .maxlen         = sizeof(watermark_boost_factor),
2150                 .mode           = 0644,
2151                 .proc_handler   = proc_dointvec_minmax,
2152                 .extra1         = SYSCTL_ZERO,
2153         },
2154         {
2155                 .procname       = "watermark_scale_factor",
2156                 .data           = &watermark_scale_factor,
2157                 .maxlen         = sizeof(watermark_scale_factor),
2158                 .mode           = 0644,
2159                 .proc_handler   = watermark_scale_factor_sysctl_handler,
2160                 .extra1         = SYSCTL_ONE,
2161                 .extra2         = SYSCTL_THREE_THOUSAND,
2162         },
2163         {
2164                 .procname       = "percpu_pagelist_high_fraction",
2165                 .data           = &percpu_pagelist_high_fraction,
2166                 .maxlen         = sizeof(percpu_pagelist_high_fraction),
2167                 .mode           = 0644,
2168                 .proc_handler   = percpu_pagelist_high_fraction_sysctl_handler,
2169                 .extra1         = SYSCTL_ZERO,
2170         },
2171         {
2172                 .procname       = "page_lock_unfairness",
2173                 .data           = &sysctl_page_lock_unfairness,
2174                 .maxlen         = sizeof(sysctl_page_lock_unfairness),
2175                 .mode           = 0644,
2176                 .proc_handler   = proc_dointvec_minmax,
2177                 .extra1         = SYSCTL_ZERO,
2178         },
2179 #ifdef CONFIG_MMU
2180         {
2181                 .procname       = "max_map_count",
2182                 .data           = &sysctl_max_map_count,
2183                 .maxlen         = sizeof(sysctl_max_map_count),
2184                 .mode           = 0644,
2185                 .proc_handler   = proc_dointvec_minmax,
2186                 .extra1         = SYSCTL_ZERO,
2187         },
2188 #else
2189         {
2190                 .procname       = "nr_trim_pages",
2191                 .data           = &sysctl_nr_trim_pages,
2192                 .maxlen         = sizeof(sysctl_nr_trim_pages),
2193                 .mode           = 0644,
2194                 .proc_handler   = proc_dointvec_minmax,
2195                 .extra1         = SYSCTL_ZERO,
2196         },
2197 #endif
2198         {
2199                 .procname       = "vfs_cache_pressure",
2200                 .data           = &sysctl_vfs_cache_pressure,
2201                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
2202                 .mode           = 0644,
2203                 .proc_handler   = proc_dointvec_minmax,
2204                 .extra1         = SYSCTL_ZERO,
2205         },
2206 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2207     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2208         {
2209                 .procname       = "legacy_va_layout",
2210                 .data           = &sysctl_legacy_va_layout,
2211                 .maxlen         = sizeof(sysctl_legacy_va_layout),
2212                 .mode           = 0644,
2213                 .proc_handler   = proc_dointvec_minmax,
2214                 .extra1         = SYSCTL_ZERO,
2215         },
2216 #endif
2217 #ifdef CONFIG_NUMA
2218         {
2219                 .procname       = "zone_reclaim_mode",
2220                 .data           = &node_reclaim_mode,
2221                 .maxlen         = sizeof(node_reclaim_mode),
2222                 .mode           = 0644,
2223                 .proc_handler   = proc_dointvec_minmax,
2224                 .extra1         = SYSCTL_ZERO,
2225         },
2226         {
2227                 .procname       = "min_unmapped_ratio",
2228                 .data           = &sysctl_min_unmapped_ratio,
2229                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
2230                 .mode           = 0644,
2231                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
2232                 .extra1         = SYSCTL_ZERO,
2233                 .extra2         = SYSCTL_ONE_HUNDRED,
2234         },
2235         {
2236                 .procname       = "min_slab_ratio",
2237                 .data           = &sysctl_min_slab_ratio,
2238                 .maxlen         = sizeof(sysctl_min_slab_ratio),
2239                 .mode           = 0644,
2240                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
2241                 .extra1         = SYSCTL_ZERO,
2242                 .extra2         = SYSCTL_ONE_HUNDRED,
2243         },
2244 #endif
2245 #ifdef CONFIG_SMP
2246         {
2247                 .procname       = "stat_interval",
2248                 .data           = &sysctl_stat_interval,
2249                 .maxlen         = sizeof(sysctl_stat_interval),
2250                 .mode           = 0644,
2251                 .proc_handler   = proc_dointvec_jiffies,
2252         },
2253         {
2254                 .procname       = "stat_refresh",
2255                 .data           = NULL,
2256                 .maxlen         = 0,
2257                 .mode           = 0600,
2258                 .proc_handler   = vmstat_refresh,
2259         },
2260 #endif
2261 #ifdef CONFIG_MMU
2262         {
2263                 .procname       = "mmap_min_addr",
2264                 .data           = &dac_mmap_min_addr,
2265                 .maxlen         = sizeof(unsigned long),
2266                 .mode           = 0644,
2267                 .proc_handler   = mmap_min_addr_handler,
2268         },
2269 #endif
2270 #ifdef CONFIG_NUMA
2271         {
2272                 .procname       = "numa_zonelist_order",
2273                 .data           = &numa_zonelist_order,
2274                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
2275                 .mode           = 0644,
2276                 .proc_handler   = numa_zonelist_order_handler,
2277         },
2278 #endif
2279 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2280    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2281         {
2282                 .procname       = "vdso_enabled",
2283 #ifdef CONFIG_X86_32
2284                 .data           = &vdso32_enabled,
2285                 .maxlen         = sizeof(vdso32_enabled),
2286 #else
2287                 .data           = &vdso_enabled,
2288                 .maxlen         = sizeof(vdso_enabled),
2289 #endif
2290                 .mode           = 0644,
2291                 .proc_handler   = proc_dointvec,
2292                 .extra1         = SYSCTL_ZERO,
2293         },
2294 #endif
2295         {
2296                 .procname       = "user_reserve_kbytes",
2297                 .data           = &sysctl_user_reserve_kbytes,
2298                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
2299                 .mode           = 0644,
2300                 .proc_handler   = proc_doulongvec_minmax,
2301         },
2302         {
2303                 .procname       = "admin_reserve_kbytes",
2304                 .data           = &sysctl_admin_reserve_kbytes,
2305                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
2306                 .mode           = 0644,
2307                 .proc_handler   = proc_doulongvec_minmax,
2308         },
2309 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2310         {
2311                 .procname       = "mmap_rnd_bits",
2312                 .data           = &mmap_rnd_bits,
2313                 .maxlen         = sizeof(mmap_rnd_bits),
2314                 .mode           = 0600,
2315                 .proc_handler   = proc_dointvec_minmax,
2316                 .extra1         = (void *)&mmap_rnd_bits_min,
2317                 .extra2         = (void *)&mmap_rnd_bits_max,
2318         },
2319 #endif
2320 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2321         {
2322                 .procname       = "mmap_rnd_compat_bits",
2323                 .data           = &mmap_rnd_compat_bits,
2324                 .maxlen         = sizeof(mmap_rnd_compat_bits),
2325                 .mode           = 0600,
2326                 .proc_handler   = proc_dointvec_minmax,
2327                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
2328                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
2329         },
2330 #endif
2331         { }
2332 };
2333
2334 static struct ctl_table debug_table[] = {
2335 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2336         {
2337                 .procname       = "exception-trace",
2338                 .data           = &show_unhandled_signals,
2339                 .maxlen         = sizeof(int),
2340                 .mode           = 0644,
2341                 .proc_handler   = proc_dointvec
2342         },
2343 #endif
2344         { }
2345 };
2346
2347 static struct ctl_table dev_table[] = {
2348         { }
2349 };
2350
2351 DECLARE_SYSCTL_BASE(kernel, kern_table);
2352 DECLARE_SYSCTL_BASE(vm, vm_table);
2353 DECLARE_SYSCTL_BASE(debug, debug_table);
2354 DECLARE_SYSCTL_BASE(dev, dev_table);
2355
2356 int __init sysctl_init_bases(void)
2357 {
2358         register_sysctl_base(kernel);
2359         register_sysctl_base(vm);
2360         register_sysctl_base(debug);
2361         register_sysctl_base(dev);
2362
2363         return 0;
2364 }
2365 #endif /* CONFIG_SYSCTL */
2366 /*
2367  * No sense putting this after each symbol definition, twice,
2368  * exception granted :-)
2369  */
2370 EXPORT_SYMBOL(proc_dobool);
2371 EXPORT_SYMBOL(proc_dointvec);
2372 EXPORT_SYMBOL(proc_douintvec);
2373 EXPORT_SYMBOL(proc_dointvec_jiffies);
2374 EXPORT_SYMBOL(proc_dointvec_minmax);
2375 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2376 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2377 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2378 EXPORT_SYMBOL(proc_dostring);
2379 EXPORT_SYMBOL(proc_doulongvec_minmax);
2380 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2381 EXPORT_SYMBOL(proc_do_large_bitmap);