genirq: Respect IRQCHIP_SKIP_SET_WAKE in irq_chip_set_wake_parent()
[platform/kernel/linux-rpi.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69 #include <linux/pipe_fs_i.h>
70
71 #include <linux/uaccess.h>
72 #include <asm/processor.h>
73
74 #ifdef CONFIG_X86
75 #include <asm/nmi.h>
76 #include <asm/stacktrace.h>
77 #include <asm/io.h>
78 #endif
79 #ifdef CONFIG_SPARC
80 #include <asm/setup.h>
81 #endif
82 #ifdef CONFIG_BSD_PROCESS_ACCT
83 #include <linux/acct.h>
84 #endif
85 #ifdef CONFIG_RT_MUTEXES
86 #include <linux/rtmutex.h>
87 #endif
88 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
89 #include <linux/lockdep.h>
90 #endif
91 #ifdef CONFIG_CHR_DEV_SG
92 #include <scsi/sg.h>
93 #endif
94
95 #ifdef CONFIG_LOCKUP_DETECTOR
96 #include <linux/nmi.h>
97 #endif
98
99 #if defined(CONFIG_SYSCTL)
100
101 /* External variables not in a header file. */
102 extern int suid_dumpable;
103 #ifdef CONFIG_COREDUMP
104 extern int core_uses_pid;
105 extern char core_pattern[];
106 extern unsigned int core_pipe_limit;
107 #endif
108 extern int pid_max;
109 extern int pid_max_min, pid_max_max;
110 extern int percpu_pagelist_fraction;
111 extern int latencytop_enabled;
112 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
113 #ifndef CONFIG_MMU
114 extern int sysctl_nr_trim_pages;
115 #endif
116
117 /* Constants used for minimum and  maximum */
118 #ifdef CONFIG_LOCKUP_DETECTOR
119 static int sixty = 60;
120 #endif
121
122 static int __maybe_unused neg_one = -1;
123
124 static int zero;
125 static int __maybe_unused one = 1;
126 static int __maybe_unused two = 2;
127 static int __maybe_unused four = 4;
128 static unsigned long one_ul = 1;
129 static unsigned long long_max = LONG_MAX;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178
179 #ifdef CONFIG_PROC_SYSCTL
180
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  *      to be written, and multiple writes on the same sysctl file descriptor
186  *      will rewrite the sysctl value, regardless of file position. No warning
187  *      is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  *      not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  *      file position 0 and the value must be fully contained in the buffer
192  *      sent to the write syscall. If dealing with strings respect the file
193  *      position, but restrict this to the max length of the buffer, anything
194  *      passed the max lenght will be ignored. Multiple writes will append
195  *      to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201         SYSCTL_WRITES_LEGACY            = -1,
202         SYSCTL_WRITES_WARN              = 0,
203         SYSCTL_WRITES_STRICT            = 1,
204 };
205
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209                   void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211                                void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216                                 void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223                 void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226                 void __user *buffer, size_t *lenp, loff_t *ppos);
227
228 #ifdef CONFIG_MAGIC_SYSRQ
229 /* Note: sysrq code uses its own private copy */
230 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
231
232 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
233                                 void __user *buffer, size_t *lenp,
234                                 loff_t *ppos)
235 {
236         int error;
237
238         error = proc_dointvec(table, write, buffer, lenp, ppos);
239         if (error)
240                 return error;
241
242         if (write)
243                 sysrq_toggle_support(__sysrq_enabled);
244
245         return 0;
246 }
247
248 #endif
249
250 static struct ctl_table kern_table[];
251 static struct ctl_table vm_table[];
252 static struct ctl_table fs_table[];
253 static struct ctl_table debug_table[];
254 static struct ctl_table dev_table[];
255 extern struct ctl_table random_table[];
256 #ifdef CONFIG_EPOLL
257 extern struct ctl_table epoll_table[];
258 #endif
259
260 #ifdef CONFIG_FW_LOADER_USER_HELPER
261 extern struct ctl_table firmware_config_table[];
262 #endif
263
264 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
265 int sysctl_legacy_va_layout;
266 #endif
267
268 /* The default sysctl tables: */
269
270 static struct ctl_table sysctl_base_table[] = {
271         {
272                 .procname       = "kernel",
273                 .mode           = 0555,
274                 .child          = kern_table,
275         },
276         {
277                 .procname       = "vm",
278                 .mode           = 0555,
279                 .child          = vm_table,
280         },
281         {
282                 .procname       = "fs",
283                 .mode           = 0555,
284                 .child          = fs_table,
285         },
286         {
287                 .procname       = "debug",
288                 .mode           = 0555,
289                 .child          = debug_table,
290         },
291         {
292                 .procname       = "dev",
293                 .mode           = 0555,
294                 .child          = dev_table,
295         },
296         { }
297 };
298
299 #ifdef CONFIG_SCHED_DEBUG
300 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
301 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
302 static int min_wakeup_granularity_ns;                   /* 0 usecs */
303 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
304 #ifdef CONFIG_SMP
305 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
306 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
307 #endif /* CONFIG_SMP */
308 #endif /* CONFIG_SCHED_DEBUG */
309
310 #ifdef CONFIG_COMPACTION
311 static int min_extfrag_threshold;
312 static int max_extfrag_threshold = 1000;
313 #endif
314
315 static struct ctl_table kern_table[] = {
316         {
317                 .procname       = "sched_child_runs_first",
318                 .data           = &sysctl_sched_child_runs_first,
319                 .maxlen         = sizeof(unsigned int),
320                 .mode           = 0644,
321                 .proc_handler   = proc_dointvec,
322         },
323 #ifdef CONFIG_SCHED_DEBUG
324         {
325                 .procname       = "sched_min_granularity_ns",
326                 .data           = &sysctl_sched_min_granularity,
327                 .maxlen         = sizeof(unsigned int),
328                 .mode           = 0644,
329                 .proc_handler   = sched_proc_update_handler,
330                 .extra1         = &min_sched_granularity_ns,
331                 .extra2         = &max_sched_granularity_ns,
332         },
333         {
334                 .procname       = "sched_latency_ns",
335                 .data           = &sysctl_sched_latency,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = sched_proc_update_handler,
339                 .extra1         = &min_sched_granularity_ns,
340                 .extra2         = &max_sched_granularity_ns,
341         },
342         {
343                 .procname       = "sched_wakeup_granularity_ns",
344                 .data           = &sysctl_sched_wakeup_granularity,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = sched_proc_update_handler,
348                 .extra1         = &min_wakeup_granularity_ns,
349                 .extra2         = &max_wakeup_granularity_ns,
350         },
351 #ifdef CONFIG_SMP
352         {
353                 .procname       = "sched_tunable_scaling",
354                 .data           = &sysctl_sched_tunable_scaling,
355                 .maxlen         = sizeof(enum sched_tunable_scaling),
356                 .mode           = 0644,
357                 .proc_handler   = sched_proc_update_handler,
358                 .extra1         = &min_sched_tunable_scaling,
359                 .extra2         = &max_sched_tunable_scaling,
360         },
361         {
362                 .procname       = "sched_migration_cost_ns",
363                 .data           = &sysctl_sched_migration_cost,
364                 .maxlen         = sizeof(unsigned int),
365                 .mode           = 0644,
366                 .proc_handler   = proc_dointvec,
367         },
368         {
369                 .procname       = "sched_nr_migrate",
370                 .data           = &sysctl_sched_nr_migrate,
371                 .maxlen         = sizeof(unsigned int),
372                 .mode           = 0644,
373                 .proc_handler   = proc_dointvec,
374         },
375 #ifdef CONFIG_SCHEDSTATS
376         {
377                 .procname       = "sched_schedstats",
378                 .data           = NULL,
379                 .maxlen         = sizeof(unsigned int),
380                 .mode           = 0644,
381                 .proc_handler   = sysctl_schedstats,
382                 .extra1         = &zero,
383                 .extra2         = &one,
384         },
385 #endif /* CONFIG_SCHEDSTATS */
386 #endif /* CONFIG_SMP */
387 #ifdef CONFIG_NUMA_BALANCING
388         {
389                 .procname       = "numa_balancing_scan_delay_ms",
390                 .data           = &sysctl_numa_balancing_scan_delay,
391                 .maxlen         = sizeof(unsigned int),
392                 .mode           = 0644,
393                 .proc_handler   = proc_dointvec,
394         },
395         {
396                 .procname       = "numa_balancing_scan_period_min_ms",
397                 .data           = &sysctl_numa_balancing_scan_period_min,
398                 .maxlen         = sizeof(unsigned int),
399                 .mode           = 0644,
400                 .proc_handler   = proc_dointvec,
401         },
402         {
403                 .procname       = "numa_balancing_scan_period_max_ms",
404                 .data           = &sysctl_numa_balancing_scan_period_max,
405                 .maxlen         = sizeof(unsigned int),
406                 .mode           = 0644,
407                 .proc_handler   = proc_dointvec,
408         },
409         {
410                 .procname       = "numa_balancing_scan_size_mb",
411                 .data           = &sysctl_numa_balancing_scan_size,
412                 .maxlen         = sizeof(unsigned int),
413                 .mode           = 0644,
414                 .proc_handler   = proc_dointvec_minmax,
415                 .extra1         = &one,
416         },
417         {
418                 .procname       = "numa_balancing",
419                 .data           = NULL, /* filled in by handler */
420                 .maxlen         = sizeof(unsigned int),
421                 .mode           = 0644,
422                 .proc_handler   = sysctl_numa_balancing,
423                 .extra1         = &zero,
424                 .extra2         = &one,
425         },
426 #endif /* CONFIG_NUMA_BALANCING */
427 #endif /* CONFIG_SCHED_DEBUG */
428         {
429                 .procname       = "sched_rt_period_us",
430                 .data           = &sysctl_sched_rt_period,
431                 .maxlen         = sizeof(unsigned int),
432                 .mode           = 0644,
433                 .proc_handler   = sched_rt_handler,
434         },
435         {
436                 .procname       = "sched_rt_runtime_us",
437                 .data           = &sysctl_sched_rt_runtime,
438                 .maxlen         = sizeof(int),
439                 .mode           = 0644,
440                 .proc_handler   = sched_rt_handler,
441         },
442         {
443                 .procname       = "sched_rr_timeslice_ms",
444                 .data           = &sysctl_sched_rr_timeslice,
445                 .maxlen         = sizeof(int),
446                 .mode           = 0644,
447                 .proc_handler   = sched_rr_handler,
448         },
449 #ifdef CONFIG_SCHED_AUTOGROUP
450         {
451                 .procname       = "sched_autogroup_enabled",
452                 .data           = &sysctl_sched_autogroup_enabled,
453                 .maxlen         = sizeof(unsigned int),
454                 .mode           = 0644,
455                 .proc_handler   = proc_dointvec_minmax,
456                 .extra1         = &zero,
457                 .extra2         = &one,
458         },
459 #endif
460 #ifdef CONFIG_CFS_BANDWIDTH
461         {
462                 .procname       = "sched_cfs_bandwidth_slice_us",
463                 .data           = &sysctl_sched_cfs_bandwidth_slice,
464                 .maxlen         = sizeof(unsigned int),
465                 .mode           = 0644,
466                 .proc_handler   = proc_dointvec_minmax,
467                 .extra1         = &one,
468         },
469 #endif
470 #ifdef CONFIG_PROVE_LOCKING
471         {
472                 .procname       = "prove_locking",
473                 .data           = &prove_locking,
474                 .maxlen         = sizeof(int),
475                 .mode           = 0644,
476                 .proc_handler   = proc_dointvec,
477         },
478 #endif
479 #ifdef CONFIG_LOCK_STAT
480         {
481                 .procname       = "lock_stat",
482                 .data           = &lock_stat,
483                 .maxlen         = sizeof(int),
484                 .mode           = 0644,
485                 .proc_handler   = proc_dointvec,
486         },
487 #endif
488         {
489                 .procname       = "panic",
490                 .data           = &panic_timeout,
491                 .maxlen         = sizeof(int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec,
494         },
495 #ifdef CONFIG_COREDUMP
496         {
497                 .procname       = "core_uses_pid",
498                 .data           = &core_uses_pid,
499                 .maxlen         = sizeof(int),
500                 .mode           = 0644,
501                 .proc_handler   = proc_dointvec,
502         },
503         {
504                 .procname       = "core_pattern",
505                 .data           = core_pattern,
506                 .maxlen         = CORENAME_MAX_SIZE,
507                 .mode           = 0644,
508                 .proc_handler   = proc_dostring_coredump,
509         },
510         {
511                 .procname       = "core_pipe_limit",
512                 .data           = &core_pipe_limit,
513                 .maxlen         = sizeof(unsigned int),
514                 .mode           = 0644,
515                 .proc_handler   = proc_dointvec,
516         },
517 #endif
518 #ifdef CONFIG_PROC_SYSCTL
519         {
520                 .procname       = "tainted",
521                 .maxlen         = sizeof(long),
522                 .mode           = 0644,
523                 .proc_handler   = proc_taint,
524         },
525         {
526                 .procname       = "sysctl_writes_strict",
527                 .data           = &sysctl_writes_strict,
528                 .maxlen         = sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = proc_dointvec_minmax,
531                 .extra1         = &neg_one,
532                 .extra2         = &one,
533         },
534 #endif
535 #ifdef CONFIG_LATENCYTOP
536         {
537                 .procname       = "latencytop",
538                 .data           = &latencytop_enabled,
539                 .maxlen         = sizeof(int),
540                 .mode           = 0644,
541                 .proc_handler   = sysctl_latencytop,
542         },
543 #endif
544 #ifdef CONFIG_BLK_DEV_INITRD
545         {
546                 .procname       = "real-root-dev",
547                 .data           = &real_root_dev,
548                 .maxlen         = sizeof(int),
549                 .mode           = 0644,
550                 .proc_handler   = proc_dointvec,
551         },
552 #endif
553         {
554                 .procname       = "print-fatal-signals",
555                 .data           = &print_fatal_signals,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec,
559         },
560 #ifdef CONFIG_SPARC
561         {
562                 .procname       = "reboot-cmd",
563                 .data           = reboot_command,
564                 .maxlen         = 256,
565                 .mode           = 0644,
566                 .proc_handler   = proc_dostring,
567         },
568         {
569                 .procname       = "stop-a",
570                 .data           = &stop_a_enabled,
571                 .maxlen         = sizeof (int),
572                 .mode           = 0644,
573                 .proc_handler   = proc_dointvec,
574         },
575         {
576                 .procname       = "scons-poweroff",
577                 .data           = &scons_pwroff,
578                 .maxlen         = sizeof (int),
579                 .mode           = 0644,
580                 .proc_handler   = proc_dointvec,
581         },
582 #endif
583 #ifdef CONFIG_SPARC64
584         {
585                 .procname       = "tsb-ratio",
586                 .data           = &sysctl_tsb_ratio,
587                 .maxlen         = sizeof (int),
588                 .mode           = 0644,
589                 .proc_handler   = proc_dointvec,
590         },
591 #endif
592 #ifdef __hppa__
593         {
594                 .procname       = "soft-power",
595                 .data           = &pwrsw_enabled,
596                 .maxlen         = sizeof (int),
597                 .mode           = 0644,
598                 .proc_handler   = proc_dointvec,
599         },
600 #endif
601 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
602         {
603                 .procname       = "unaligned-trap",
604                 .data           = &unaligned_enabled,
605                 .maxlen         = sizeof (int),
606                 .mode           = 0644,
607                 .proc_handler   = proc_dointvec,
608         },
609 #endif
610         {
611                 .procname       = "ctrl-alt-del",
612                 .data           = &C_A_D,
613                 .maxlen         = sizeof(int),
614                 .mode           = 0644,
615                 .proc_handler   = proc_dointvec,
616         },
617 #ifdef CONFIG_FUNCTION_TRACER
618         {
619                 .procname       = "ftrace_enabled",
620                 .data           = &ftrace_enabled,
621                 .maxlen         = sizeof(int),
622                 .mode           = 0644,
623                 .proc_handler   = ftrace_enable_sysctl,
624         },
625 #endif
626 #ifdef CONFIG_STACK_TRACER
627         {
628                 .procname       = "stack_tracer_enabled",
629                 .data           = &stack_tracer_enabled,
630                 .maxlen         = sizeof(int),
631                 .mode           = 0644,
632                 .proc_handler   = stack_trace_sysctl,
633         },
634 #endif
635 #ifdef CONFIG_TRACING
636         {
637                 .procname       = "ftrace_dump_on_oops",
638                 .data           = &ftrace_dump_on_oops,
639                 .maxlen         = sizeof(int),
640                 .mode           = 0644,
641                 .proc_handler   = proc_dointvec,
642         },
643         {
644                 .procname       = "traceoff_on_warning",
645                 .data           = &__disable_trace_on_warning,
646                 .maxlen         = sizeof(__disable_trace_on_warning),
647                 .mode           = 0644,
648                 .proc_handler   = proc_dointvec,
649         },
650         {
651                 .procname       = "tracepoint_printk",
652                 .data           = &tracepoint_printk,
653                 .maxlen         = sizeof(tracepoint_printk),
654                 .mode           = 0644,
655                 .proc_handler   = tracepoint_printk_sysctl,
656         },
657 #endif
658 #ifdef CONFIG_KEXEC_CORE
659         {
660                 .procname       = "kexec_load_disabled",
661                 .data           = &kexec_load_disabled,
662                 .maxlen         = sizeof(int),
663                 .mode           = 0644,
664                 /* only handle a transition from default "0" to "1" */
665                 .proc_handler   = proc_dointvec_minmax,
666                 .extra1         = &one,
667                 .extra2         = &one,
668         },
669 #endif
670 #ifdef CONFIG_MODULES
671         {
672                 .procname       = "modprobe",
673                 .data           = &modprobe_path,
674                 .maxlen         = KMOD_PATH_LEN,
675                 .mode           = 0644,
676                 .proc_handler   = proc_dostring,
677         },
678         {
679                 .procname       = "modules_disabled",
680                 .data           = &modules_disabled,
681                 .maxlen         = sizeof(int),
682                 .mode           = 0644,
683                 /* only handle a transition from default "0" to "1" */
684                 .proc_handler   = proc_dointvec_minmax,
685                 .extra1         = &one,
686                 .extra2         = &one,
687         },
688 #endif
689 #ifdef CONFIG_UEVENT_HELPER
690         {
691                 .procname       = "hotplug",
692                 .data           = &uevent_helper,
693                 .maxlen         = UEVENT_HELPER_PATH_LEN,
694                 .mode           = 0644,
695                 .proc_handler   = proc_dostring,
696         },
697 #endif
698 #ifdef CONFIG_CHR_DEV_SG
699         {
700                 .procname       = "sg-big-buff",
701                 .data           = &sg_big_buff,
702                 .maxlen         = sizeof (int),
703                 .mode           = 0444,
704                 .proc_handler   = proc_dointvec,
705         },
706 #endif
707 #ifdef CONFIG_BSD_PROCESS_ACCT
708         {
709                 .procname       = "acct",
710                 .data           = &acct_parm,
711                 .maxlen         = 3*sizeof(int),
712                 .mode           = 0644,
713                 .proc_handler   = proc_dointvec,
714         },
715 #endif
716 #ifdef CONFIG_MAGIC_SYSRQ
717         {
718                 .procname       = "sysrq",
719                 .data           = &__sysrq_enabled,
720                 .maxlen         = sizeof (int),
721                 .mode           = 0644,
722                 .proc_handler   = sysrq_sysctl_handler,
723         },
724 #endif
725 #ifdef CONFIG_PROC_SYSCTL
726         {
727                 .procname       = "cad_pid",
728                 .data           = NULL,
729                 .maxlen         = sizeof (int),
730                 .mode           = 0600,
731                 .proc_handler   = proc_do_cad_pid,
732         },
733 #endif
734         {
735                 .procname       = "threads-max",
736                 .data           = NULL,
737                 .maxlen         = sizeof(int),
738                 .mode           = 0644,
739                 .proc_handler   = sysctl_max_threads,
740         },
741         {
742                 .procname       = "random",
743                 .mode           = 0555,
744                 .child          = random_table,
745         },
746         {
747                 .procname       = "usermodehelper",
748                 .mode           = 0555,
749                 .child          = usermodehelper_table,
750         },
751 #ifdef CONFIG_FW_LOADER_USER_HELPER
752         {
753                 .procname       = "firmware_config",
754                 .mode           = 0555,
755                 .child          = firmware_config_table,
756         },
757 #endif
758         {
759                 .procname       = "overflowuid",
760                 .data           = &overflowuid,
761                 .maxlen         = sizeof(int),
762                 .mode           = 0644,
763                 .proc_handler   = proc_dointvec_minmax,
764                 .extra1         = &minolduid,
765                 .extra2         = &maxolduid,
766         },
767         {
768                 .procname       = "overflowgid",
769                 .data           = &overflowgid,
770                 .maxlen         = sizeof(int),
771                 .mode           = 0644,
772                 .proc_handler   = proc_dointvec_minmax,
773                 .extra1         = &minolduid,
774                 .extra2         = &maxolduid,
775         },
776 #ifdef CONFIG_S390
777 #ifdef CONFIG_MATHEMU
778         {
779                 .procname       = "ieee_emulation_warnings",
780                 .data           = &sysctl_ieee_emulation_warnings,
781                 .maxlen         = sizeof(int),
782                 .mode           = 0644,
783                 .proc_handler   = proc_dointvec,
784         },
785 #endif
786         {
787                 .procname       = "userprocess_debug",
788                 .data           = &show_unhandled_signals,
789                 .maxlen         = sizeof(int),
790                 .mode           = 0644,
791                 .proc_handler   = proc_dointvec,
792         },
793 #endif
794         {
795                 .procname       = "pid_max",
796                 .data           = &pid_max,
797                 .maxlen         = sizeof (int),
798                 .mode           = 0644,
799                 .proc_handler   = proc_dointvec_minmax,
800                 .extra1         = &pid_max_min,
801                 .extra2         = &pid_max_max,
802         },
803         {
804                 .procname       = "panic_on_oops",
805                 .data           = &panic_on_oops,
806                 .maxlen         = sizeof(int),
807                 .mode           = 0644,
808                 .proc_handler   = proc_dointvec,
809         },
810 #if defined CONFIG_PRINTK
811         {
812                 .procname       = "printk",
813                 .data           = &console_loglevel,
814                 .maxlen         = 4*sizeof(int),
815                 .mode           = 0644,
816                 .proc_handler   = proc_dointvec,
817         },
818         {
819                 .procname       = "printk_ratelimit",
820                 .data           = &printk_ratelimit_state.interval,
821                 .maxlen         = sizeof(int),
822                 .mode           = 0644,
823                 .proc_handler   = proc_dointvec_jiffies,
824         },
825         {
826                 .procname       = "printk_ratelimit_burst",
827                 .data           = &printk_ratelimit_state.burst,
828                 .maxlen         = sizeof(int),
829                 .mode           = 0644,
830                 .proc_handler   = proc_dointvec,
831         },
832         {
833                 .procname       = "printk_delay",
834                 .data           = &printk_delay_msec,
835                 .maxlen         = sizeof(int),
836                 .mode           = 0644,
837                 .proc_handler   = proc_dointvec_minmax,
838                 .extra1         = &zero,
839                 .extra2         = &ten_thousand,
840         },
841         {
842                 .procname       = "printk_devkmsg",
843                 .data           = devkmsg_log_str,
844                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
845                 .mode           = 0644,
846                 .proc_handler   = devkmsg_sysctl_set_loglvl,
847         },
848         {
849                 .procname       = "dmesg_restrict",
850                 .data           = &dmesg_restrict,
851                 .maxlen         = sizeof(int),
852                 .mode           = 0644,
853                 .proc_handler   = proc_dointvec_minmax_sysadmin,
854                 .extra1         = &zero,
855                 .extra2         = &one,
856         },
857         {
858                 .procname       = "kptr_restrict",
859                 .data           = &kptr_restrict,
860                 .maxlen         = sizeof(int),
861                 .mode           = 0644,
862                 .proc_handler   = proc_dointvec_minmax_sysadmin,
863                 .extra1         = &zero,
864                 .extra2         = &two,
865         },
866 #endif
867         {
868                 .procname       = "ngroups_max",
869                 .data           = &ngroups_max,
870                 .maxlen         = sizeof (int),
871                 .mode           = 0444,
872                 .proc_handler   = proc_dointvec,
873         },
874         {
875                 .procname       = "cap_last_cap",
876                 .data           = (void *)&cap_last_cap,
877                 .maxlen         = sizeof(int),
878                 .mode           = 0444,
879                 .proc_handler   = proc_dointvec,
880         },
881 #if defined(CONFIG_LOCKUP_DETECTOR)
882         {
883                 .procname       = "watchdog",
884                 .data           = &watchdog_user_enabled,
885                 .maxlen         = sizeof(int),
886                 .mode           = 0644,
887                 .proc_handler   = proc_watchdog,
888                 .extra1         = &zero,
889                 .extra2         = &one,
890         },
891         {
892                 .procname       = "watchdog_thresh",
893                 .data           = &watchdog_thresh,
894                 .maxlen         = sizeof(int),
895                 .mode           = 0644,
896                 .proc_handler   = proc_watchdog_thresh,
897                 .extra1         = &zero,
898                 .extra2         = &sixty,
899         },
900         {
901                 .procname       = "nmi_watchdog",
902                 .data           = &nmi_watchdog_user_enabled,
903                 .maxlen         = sizeof(int),
904                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
905                 .proc_handler   = proc_nmi_watchdog,
906                 .extra1         = &zero,
907                 .extra2         = &one,
908         },
909         {
910                 .procname       = "watchdog_cpumask",
911                 .data           = &watchdog_cpumask_bits,
912                 .maxlen         = NR_CPUS,
913                 .mode           = 0644,
914                 .proc_handler   = proc_watchdog_cpumask,
915         },
916 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
917         {
918                 .procname       = "soft_watchdog",
919                 .data           = &soft_watchdog_user_enabled,
920                 .maxlen         = sizeof(int),
921                 .mode           = 0644,
922                 .proc_handler   = proc_soft_watchdog,
923                 .extra1         = &zero,
924                 .extra2         = &one,
925         },
926         {
927                 .procname       = "softlockup_panic",
928                 .data           = &softlockup_panic,
929                 .maxlen         = sizeof(int),
930                 .mode           = 0644,
931                 .proc_handler   = proc_dointvec_minmax,
932                 .extra1         = &zero,
933                 .extra2         = &one,
934         },
935 #ifdef CONFIG_SMP
936         {
937                 .procname       = "softlockup_all_cpu_backtrace",
938                 .data           = &sysctl_softlockup_all_cpu_backtrace,
939                 .maxlen         = sizeof(int),
940                 .mode           = 0644,
941                 .proc_handler   = proc_dointvec_minmax,
942                 .extra1         = &zero,
943                 .extra2         = &one,
944         },
945 #endif /* CONFIG_SMP */
946 #endif
947 #ifdef CONFIG_HARDLOCKUP_DETECTOR
948         {
949                 .procname       = "hardlockup_panic",
950                 .data           = &hardlockup_panic,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &zero,
955                 .extra2         = &one,
956         },
957 #ifdef CONFIG_SMP
958         {
959                 .procname       = "hardlockup_all_cpu_backtrace",
960                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
961                 .maxlen         = sizeof(int),
962                 .mode           = 0644,
963                 .proc_handler   = proc_dointvec_minmax,
964                 .extra1         = &zero,
965                 .extra2         = &one,
966         },
967 #endif /* CONFIG_SMP */
968 #endif
969 #endif
970
971 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
972         {
973                 .procname       = "unknown_nmi_panic",
974                 .data           = &unknown_nmi_panic,
975                 .maxlen         = sizeof (int),
976                 .mode           = 0644,
977                 .proc_handler   = proc_dointvec,
978         },
979 #endif
980 #if defined(CONFIG_X86)
981         {
982                 .procname       = "panic_on_unrecovered_nmi",
983                 .data           = &panic_on_unrecovered_nmi,
984                 .maxlen         = sizeof(int),
985                 .mode           = 0644,
986                 .proc_handler   = proc_dointvec,
987         },
988         {
989                 .procname       = "panic_on_io_nmi",
990                 .data           = &panic_on_io_nmi,
991                 .maxlen         = sizeof(int),
992                 .mode           = 0644,
993                 .proc_handler   = proc_dointvec,
994         },
995 #ifdef CONFIG_DEBUG_STACKOVERFLOW
996         {
997                 .procname       = "panic_on_stackoverflow",
998                 .data           = &sysctl_panic_on_stackoverflow,
999                 .maxlen         = sizeof(int),
1000                 .mode           = 0644,
1001                 .proc_handler   = proc_dointvec,
1002         },
1003 #endif
1004         {
1005                 .procname       = "bootloader_type",
1006                 .data           = &bootloader_type,
1007                 .maxlen         = sizeof (int),
1008                 .mode           = 0444,
1009                 .proc_handler   = proc_dointvec,
1010         },
1011         {
1012                 .procname       = "bootloader_version",
1013                 .data           = &bootloader_version,
1014                 .maxlen         = sizeof (int),
1015                 .mode           = 0444,
1016                 .proc_handler   = proc_dointvec,
1017         },
1018         {
1019                 .procname       = "io_delay_type",
1020                 .data           = &io_delay_type,
1021                 .maxlen         = sizeof(int),
1022                 .mode           = 0644,
1023                 .proc_handler   = proc_dointvec,
1024         },
1025 #endif
1026 #if defined(CONFIG_MMU)
1027         {
1028                 .procname       = "randomize_va_space",
1029                 .data           = &randomize_va_space,
1030                 .maxlen         = sizeof(int),
1031                 .mode           = 0644,
1032                 .proc_handler   = proc_dointvec,
1033         },
1034 #endif
1035 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1036         {
1037                 .procname       = "spin_retry",
1038                 .data           = &spin_retry,
1039                 .maxlen         = sizeof (int),
1040                 .mode           = 0644,
1041                 .proc_handler   = proc_dointvec,
1042         },
1043 #endif
1044 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1045         {
1046                 .procname       = "acpi_video_flags",
1047                 .data           = &acpi_realmode_flags,
1048                 .maxlen         = sizeof (unsigned long),
1049                 .mode           = 0644,
1050                 .proc_handler   = proc_doulongvec_minmax,
1051         },
1052 #endif
1053 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1054         {
1055                 .procname       = "ignore-unaligned-usertrap",
1056                 .data           = &no_unaligned_warning,
1057                 .maxlen         = sizeof (int),
1058                 .mode           = 0644,
1059                 .proc_handler   = proc_dointvec,
1060         },
1061 #endif
1062 #ifdef CONFIG_IA64
1063         {
1064                 .procname       = "unaligned-dump-stack",
1065                 .data           = &unaligned_dump_stack,
1066                 .maxlen         = sizeof (int),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec,
1069         },
1070 #endif
1071 #ifdef CONFIG_DETECT_HUNG_TASK
1072         {
1073                 .procname       = "hung_task_panic",
1074                 .data           = &sysctl_hung_task_panic,
1075                 .maxlen         = sizeof(int),
1076                 .mode           = 0644,
1077                 .proc_handler   = proc_dointvec_minmax,
1078                 .extra1         = &zero,
1079                 .extra2         = &one,
1080         },
1081         {
1082                 .procname       = "hung_task_check_count",
1083                 .data           = &sysctl_hung_task_check_count,
1084                 .maxlen         = sizeof(int),
1085                 .mode           = 0644,
1086                 .proc_handler   = proc_dointvec_minmax,
1087                 .extra1         = &zero,
1088         },
1089         {
1090                 .procname       = "hung_task_timeout_secs",
1091                 .data           = &sysctl_hung_task_timeout_secs,
1092                 .maxlen         = sizeof(unsigned long),
1093                 .mode           = 0644,
1094                 .proc_handler   = proc_dohung_task_timeout_secs,
1095                 .extra2         = &hung_task_timeout_max,
1096         },
1097         {
1098                 .procname       = "hung_task_check_interval_secs",
1099                 .data           = &sysctl_hung_task_check_interval_secs,
1100                 .maxlen         = sizeof(unsigned long),
1101                 .mode           = 0644,
1102                 .proc_handler   = proc_dohung_task_timeout_secs,
1103                 .extra2         = &hung_task_timeout_max,
1104         },
1105         {
1106                 .procname       = "hung_task_warnings",
1107                 .data           = &sysctl_hung_task_warnings,
1108                 .maxlen         = sizeof(int),
1109                 .mode           = 0644,
1110                 .proc_handler   = proc_dointvec_minmax,
1111                 .extra1         = &neg_one,
1112         },
1113 #endif
1114 #ifdef CONFIG_RT_MUTEXES
1115         {
1116                 .procname       = "max_lock_depth",
1117                 .data           = &max_lock_depth,
1118                 .maxlen         = sizeof(int),
1119                 .mode           = 0644,
1120                 .proc_handler   = proc_dointvec,
1121         },
1122 #endif
1123         {
1124                 .procname       = "poweroff_cmd",
1125                 .data           = &poweroff_cmd,
1126                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1127                 .mode           = 0644,
1128                 .proc_handler   = proc_dostring,
1129         },
1130 #ifdef CONFIG_KEYS
1131         {
1132                 .procname       = "keys",
1133                 .mode           = 0555,
1134                 .child          = key_sysctls,
1135         },
1136 #endif
1137 #ifdef CONFIG_PERF_EVENTS
1138         /*
1139          * User-space scripts rely on the existence of this file
1140          * as a feature check for perf_events being enabled.
1141          *
1142          * So it's an ABI, do not remove!
1143          */
1144         {
1145                 .procname       = "perf_event_paranoid",
1146                 .data           = &sysctl_perf_event_paranoid,
1147                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1148                 .mode           = 0644,
1149                 .proc_handler   = proc_dointvec,
1150         },
1151         {
1152                 .procname       = "perf_event_mlock_kb",
1153                 .data           = &sysctl_perf_event_mlock,
1154                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1155                 .mode           = 0644,
1156                 .proc_handler   = proc_dointvec,
1157         },
1158         {
1159                 .procname       = "perf_event_max_sample_rate",
1160                 .data           = &sysctl_perf_event_sample_rate,
1161                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1162                 .mode           = 0644,
1163                 .proc_handler   = perf_proc_update_handler,
1164                 .extra1         = &one,
1165         },
1166         {
1167                 .procname       = "perf_cpu_time_max_percent",
1168                 .data           = &sysctl_perf_cpu_time_max_percent,
1169                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1170                 .mode           = 0644,
1171                 .proc_handler   = perf_cpu_time_max_percent_handler,
1172                 .extra1         = &zero,
1173                 .extra2         = &one_hundred,
1174         },
1175         {
1176                 .procname       = "perf_event_max_stack",
1177                 .data           = &sysctl_perf_event_max_stack,
1178                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1179                 .mode           = 0644,
1180                 .proc_handler   = perf_event_max_stack_handler,
1181                 .extra1         = &zero,
1182                 .extra2         = &six_hundred_forty_kb,
1183         },
1184         {
1185                 .procname       = "perf_event_max_contexts_per_stack",
1186                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1187                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1188                 .mode           = 0644,
1189                 .proc_handler   = perf_event_max_stack_handler,
1190                 .extra1         = &zero,
1191                 .extra2         = &one_thousand,
1192         },
1193 #endif
1194         {
1195                 .procname       = "panic_on_warn",
1196                 .data           = &panic_on_warn,
1197                 .maxlen         = sizeof(int),
1198                 .mode           = 0644,
1199                 .proc_handler   = proc_dointvec_minmax,
1200                 .extra1         = &zero,
1201                 .extra2         = &one,
1202         },
1203 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1204         {
1205                 .procname       = "timer_migration",
1206                 .data           = &sysctl_timer_migration,
1207                 .maxlen         = sizeof(unsigned int),
1208                 .mode           = 0644,
1209                 .proc_handler   = timer_migration_handler,
1210                 .extra1         = &zero,
1211                 .extra2         = &one,
1212         },
1213 #endif
1214 #ifdef CONFIG_BPF_SYSCALL
1215         {
1216                 .procname       = "unprivileged_bpf_disabled",
1217                 .data           = &sysctl_unprivileged_bpf_disabled,
1218                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1219                 .mode           = 0644,
1220                 /* only handle a transition from default "0" to "1" */
1221                 .proc_handler   = proc_dointvec_minmax,
1222                 .extra1         = &one,
1223                 .extra2         = &one,
1224         },
1225 #endif
1226 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1227         {
1228                 .procname       = "panic_on_rcu_stall",
1229                 .data           = &sysctl_panic_on_rcu_stall,
1230                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1231                 .mode           = 0644,
1232                 .proc_handler   = proc_dointvec_minmax,
1233                 .extra1         = &zero,
1234                 .extra2         = &one,
1235         },
1236 #endif
1237         { }
1238 };
1239
1240 static struct ctl_table vm_table[] = {
1241         {
1242                 .procname       = "overcommit_memory",
1243                 .data           = &sysctl_overcommit_memory,
1244                 .maxlen         = sizeof(sysctl_overcommit_memory),
1245                 .mode           = 0644,
1246                 .proc_handler   = proc_dointvec_minmax,
1247                 .extra1         = &zero,
1248                 .extra2         = &two,
1249         },
1250         {
1251                 .procname       = "panic_on_oom",
1252                 .data           = &sysctl_panic_on_oom,
1253                 .maxlen         = sizeof(sysctl_panic_on_oom),
1254                 .mode           = 0644,
1255                 .proc_handler   = proc_dointvec_minmax,
1256                 .extra1         = &zero,
1257                 .extra2         = &two,
1258         },
1259         {
1260                 .procname       = "oom_kill_allocating_task",
1261                 .data           = &sysctl_oom_kill_allocating_task,
1262                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1263                 .mode           = 0644,
1264                 .proc_handler   = proc_dointvec,
1265         },
1266         {
1267                 .procname       = "oom_dump_tasks",
1268                 .data           = &sysctl_oom_dump_tasks,
1269                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1270                 .mode           = 0644,
1271                 .proc_handler   = proc_dointvec,
1272         },
1273         {
1274                 .procname       = "overcommit_ratio",
1275                 .data           = &sysctl_overcommit_ratio,
1276                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1277                 .mode           = 0644,
1278                 .proc_handler   = overcommit_ratio_handler,
1279         },
1280         {
1281                 .procname       = "overcommit_kbytes",
1282                 .data           = &sysctl_overcommit_kbytes,
1283                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1284                 .mode           = 0644,
1285                 .proc_handler   = overcommit_kbytes_handler,
1286         },
1287         {
1288                 .procname       = "page-cluster", 
1289                 .data           = &page_cluster,
1290                 .maxlen         = sizeof(int),
1291                 .mode           = 0644,
1292                 .proc_handler   = proc_dointvec_minmax,
1293                 .extra1         = &zero,
1294         },
1295         {
1296                 .procname       = "dirty_background_ratio",
1297                 .data           = &dirty_background_ratio,
1298                 .maxlen         = sizeof(dirty_background_ratio),
1299                 .mode           = 0644,
1300                 .proc_handler   = dirty_background_ratio_handler,
1301                 .extra1         = &zero,
1302                 .extra2         = &one_hundred,
1303         },
1304         {
1305                 .procname       = "dirty_background_bytes",
1306                 .data           = &dirty_background_bytes,
1307                 .maxlen         = sizeof(dirty_background_bytes),
1308                 .mode           = 0644,
1309                 .proc_handler   = dirty_background_bytes_handler,
1310                 .extra1         = &one_ul,
1311         },
1312         {
1313                 .procname       = "dirty_ratio",
1314                 .data           = &vm_dirty_ratio,
1315                 .maxlen         = sizeof(vm_dirty_ratio),
1316                 .mode           = 0644,
1317                 .proc_handler   = dirty_ratio_handler,
1318                 .extra1         = &zero,
1319                 .extra2         = &one_hundred,
1320         },
1321         {
1322                 .procname       = "dirty_bytes",
1323                 .data           = &vm_dirty_bytes,
1324                 .maxlen         = sizeof(vm_dirty_bytes),
1325                 .mode           = 0644,
1326                 .proc_handler   = dirty_bytes_handler,
1327                 .extra1         = &dirty_bytes_min,
1328         },
1329         {
1330                 .procname       = "dirty_writeback_centisecs",
1331                 .data           = &dirty_writeback_interval,
1332                 .maxlen         = sizeof(dirty_writeback_interval),
1333                 .mode           = 0644,
1334                 .proc_handler   = dirty_writeback_centisecs_handler,
1335         },
1336         {
1337                 .procname       = "dirty_expire_centisecs",
1338                 .data           = &dirty_expire_interval,
1339                 .maxlen         = sizeof(dirty_expire_interval),
1340                 .mode           = 0644,
1341                 .proc_handler   = proc_dointvec_minmax,
1342                 .extra1         = &zero,
1343         },
1344         {
1345                 .procname       = "dirtytime_expire_seconds",
1346                 .data           = &dirtytime_expire_interval,
1347                 .maxlen         = sizeof(dirtytime_expire_interval),
1348                 .mode           = 0644,
1349                 .proc_handler   = dirtytime_interval_handler,
1350                 .extra1         = &zero,
1351         },
1352         {
1353                 .procname       = "swappiness",
1354                 .data           = &vm_swappiness,
1355                 .maxlen         = sizeof(vm_swappiness),
1356                 .mode           = 0644,
1357                 .proc_handler   = proc_dointvec_minmax,
1358                 .extra1         = &zero,
1359                 .extra2         = &one_hundred,
1360         },
1361 #ifdef CONFIG_HUGETLB_PAGE
1362         {
1363                 .procname       = "nr_hugepages",
1364                 .data           = NULL,
1365                 .maxlen         = sizeof(unsigned long),
1366                 .mode           = 0644,
1367                 .proc_handler   = hugetlb_sysctl_handler,
1368         },
1369 #ifdef CONFIG_NUMA
1370         {
1371                 .procname       = "nr_hugepages_mempolicy",
1372                 .data           = NULL,
1373                 .maxlen         = sizeof(unsigned long),
1374                 .mode           = 0644,
1375                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1376         },
1377         {
1378                 .procname               = "numa_stat",
1379                 .data                   = &sysctl_vm_numa_stat,
1380                 .maxlen                 = sizeof(int),
1381                 .mode                   = 0644,
1382                 .proc_handler   = sysctl_vm_numa_stat_handler,
1383                 .extra1                 = &zero,
1384                 .extra2                 = &one,
1385         },
1386 #endif
1387          {
1388                 .procname       = "hugetlb_shm_group",
1389                 .data           = &sysctl_hugetlb_shm_group,
1390                 .maxlen         = sizeof(gid_t),
1391                 .mode           = 0644,
1392                 .proc_handler   = proc_dointvec,
1393          },
1394         {
1395                 .procname       = "nr_overcommit_hugepages",
1396                 .data           = NULL,
1397                 .maxlen         = sizeof(unsigned long),
1398                 .mode           = 0644,
1399                 .proc_handler   = hugetlb_overcommit_handler,
1400         },
1401 #endif
1402         {
1403                 .procname       = "lowmem_reserve_ratio",
1404                 .data           = &sysctl_lowmem_reserve_ratio,
1405                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1406                 .mode           = 0644,
1407                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1408         },
1409         {
1410                 .procname       = "drop_caches",
1411                 .data           = &sysctl_drop_caches,
1412                 .maxlen         = sizeof(int),
1413                 .mode           = 0644,
1414                 .proc_handler   = drop_caches_sysctl_handler,
1415                 .extra1         = &one,
1416                 .extra2         = &four,
1417         },
1418 #ifdef CONFIG_COMPACTION
1419         {
1420                 .procname       = "compact_memory",
1421                 .data           = &sysctl_compact_memory,
1422                 .maxlen         = sizeof(int),
1423                 .mode           = 0200,
1424                 .proc_handler   = sysctl_compaction_handler,
1425         },
1426         {
1427                 .procname       = "extfrag_threshold",
1428                 .data           = &sysctl_extfrag_threshold,
1429                 .maxlen         = sizeof(int),
1430                 .mode           = 0644,
1431                 .proc_handler   = sysctl_extfrag_handler,
1432                 .extra1         = &min_extfrag_threshold,
1433                 .extra2         = &max_extfrag_threshold,
1434         },
1435         {
1436                 .procname       = "compact_unevictable_allowed",
1437                 .data           = &sysctl_compact_unevictable_allowed,
1438                 .maxlen         = sizeof(int),
1439                 .mode           = 0644,
1440                 .proc_handler   = proc_dointvec,
1441                 .extra1         = &zero,
1442                 .extra2         = &one,
1443         },
1444
1445 #endif /* CONFIG_COMPACTION */
1446         {
1447                 .procname       = "min_free_kbytes",
1448                 .data           = &min_free_kbytes,
1449                 .maxlen         = sizeof(min_free_kbytes),
1450                 .mode           = 0644,
1451                 .proc_handler   = min_free_kbytes_sysctl_handler,
1452                 .extra1         = &zero,
1453         },
1454         {
1455                 .procname       = "watermark_scale_factor",
1456                 .data           = &watermark_scale_factor,
1457                 .maxlen         = sizeof(watermark_scale_factor),
1458                 .mode           = 0644,
1459                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1460                 .extra1         = &one,
1461                 .extra2         = &one_thousand,
1462         },
1463         {
1464                 .procname       = "percpu_pagelist_fraction",
1465                 .data           = &percpu_pagelist_fraction,
1466                 .maxlen         = sizeof(percpu_pagelist_fraction),
1467                 .mode           = 0644,
1468                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1469                 .extra1         = &zero,
1470         },
1471 #ifdef CONFIG_MMU
1472         {
1473                 .procname       = "max_map_count",
1474                 .data           = &sysctl_max_map_count,
1475                 .maxlen         = sizeof(sysctl_max_map_count),
1476                 .mode           = 0644,
1477                 .proc_handler   = proc_dointvec_minmax,
1478                 .extra1         = &zero,
1479         },
1480 #else
1481         {
1482                 .procname       = "nr_trim_pages",
1483                 .data           = &sysctl_nr_trim_pages,
1484                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1485                 .mode           = 0644,
1486                 .proc_handler   = proc_dointvec_minmax,
1487                 .extra1         = &zero,
1488         },
1489 #endif
1490         {
1491                 .procname       = "laptop_mode",
1492                 .data           = &laptop_mode,
1493                 .maxlen         = sizeof(laptop_mode),
1494                 .mode           = 0644,
1495                 .proc_handler   = proc_dointvec_jiffies,
1496         },
1497         {
1498                 .procname       = "block_dump",
1499                 .data           = &block_dump,
1500                 .maxlen         = sizeof(block_dump),
1501                 .mode           = 0644,
1502                 .proc_handler   = proc_dointvec,
1503                 .extra1         = &zero,
1504         },
1505         {
1506                 .procname       = "vfs_cache_pressure",
1507                 .data           = &sysctl_vfs_cache_pressure,
1508                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1509                 .mode           = 0644,
1510                 .proc_handler   = proc_dointvec,
1511                 .extra1         = &zero,
1512         },
1513 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1514         {
1515                 .procname       = "legacy_va_layout",
1516                 .data           = &sysctl_legacy_va_layout,
1517                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1518                 .mode           = 0644,
1519                 .proc_handler   = proc_dointvec,
1520                 .extra1         = &zero,
1521         },
1522 #endif
1523 #ifdef CONFIG_NUMA
1524         {
1525                 .procname       = "zone_reclaim_mode",
1526                 .data           = &node_reclaim_mode,
1527                 .maxlen         = sizeof(node_reclaim_mode),
1528                 .mode           = 0644,
1529                 .proc_handler   = proc_dointvec,
1530                 .extra1         = &zero,
1531         },
1532         {
1533                 .procname       = "min_unmapped_ratio",
1534                 .data           = &sysctl_min_unmapped_ratio,
1535                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1536                 .mode           = 0644,
1537                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1538                 .extra1         = &zero,
1539                 .extra2         = &one_hundred,
1540         },
1541         {
1542                 .procname       = "min_slab_ratio",
1543                 .data           = &sysctl_min_slab_ratio,
1544                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1545                 .mode           = 0644,
1546                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1547                 .extra1         = &zero,
1548                 .extra2         = &one_hundred,
1549         },
1550 #endif
1551 #ifdef CONFIG_SMP
1552         {
1553                 .procname       = "stat_interval",
1554                 .data           = &sysctl_stat_interval,
1555                 .maxlen         = sizeof(sysctl_stat_interval),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec_jiffies,
1558         },
1559         {
1560                 .procname       = "stat_refresh",
1561                 .data           = NULL,
1562                 .maxlen         = 0,
1563                 .mode           = 0600,
1564                 .proc_handler   = vmstat_refresh,
1565         },
1566 #endif
1567 #ifdef CONFIG_MMU
1568         {
1569                 .procname       = "mmap_min_addr",
1570                 .data           = &dac_mmap_min_addr,
1571                 .maxlen         = sizeof(unsigned long),
1572                 .mode           = 0644,
1573                 .proc_handler   = mmap_min_addr_handler,
1574         },
1575 #endif
1576 #ifdef CONFIG_NUMA
1577         {
1578                 .procname       = "numa_zonelist_order",
1579                 .data           = &numa_zonelist_order,
1580                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1581                 .mode           = 0644,
1582                 .proc_handler   = numa_zonelist_order_handler,
1583         },
1584 #endif
1585 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1586    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1587         {
1588                 .procname       = "vdso_enabled",
1589 #ifdef CONFIG_X86_32
1590                 .data           = &vdso32_enabled,
1591                 .maxlen         = sizeof(vdso32_enabled),
1592 #else
1593                 .data           = &vdso_enabled,
1594                 .maxlen         = sizeof(vdso_enabled),
1595 #endif
1596                 .mode           = 0644,
1597                 .proc_handler   = proc_dointvec,
1598                 .extra1         = &zero,
1599         },
1600 #endif
1601 #ifdef CONFIG_HIGHMEM
1602         {
1603                 .procname       = "highmem_is_dirtyable",
1604                 .data           = &vm_highmem_is_dirtyable,
1605                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1606                 .mode           = 0644,
1607                 .proc_handler   = proc_dointvec_minmax,
1608                 .extra1         = &zero,
1609                 .extra2         = &one,
1610         },
1611 #endif
1612 #ifdef CONFIG_MEMORY_FAILURE
1613         {
1614                 .procname       = "memory_failure_early_kill",
1615                 .data           = &sysctl_memory_failure_early_kill,
1616                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1617                 .mode           = 0644,
1618                 .proc_handler   = proc_dointvec_minmax,
1619                 .extra1         = &zero,
1620                 .extra2         = &one,
1621         },
1622         {
1623                 .procname       = "memory_failure_recovery",
1624                 .data           = &sysctl_memory_failure_recovery,
1625                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1626                 .mode           = 0644,
1627                 .proc_handler   = proc_dointvec_minmax,
1628                 .extra1         = &zero,
1629                 .extra2         = &one,
1630         },
1631 #endif
1632         {
1633                 .procname       = "user_reserve_kbytes",
1634                 .data           = &sysctl_user_reserve_kbytes,
1635                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1636                 .mode           = 0644,
1637                 .proc_handler   = proc_doulongvec_minmax,
1638         },
1639         {
1640                 .procname       = "admin_reserve_kbytes",
1641                 .data           = &sysctl_admin_reserve_kbytes,
1642                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1643                 .mode           = 0644,
1644                 .proc_handler   = proc_doulongvec_minmax,
1645         },
1646 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1647         {
1648                 .procname       = "mmap_rnd_bits",
1649                 .data           = &mmap_rnd_bits,
1650                 .maxlen         = sizeof(mmap_rnd_bits),
1651                 .mode           = 0600,
1652                 .proc_handler   = proc_dointvec_minmax,
1653                 .extra1         = (void *)&mmap_rnd_bits_min,
1654                 .extra2         = (void *)&mmap_rnd_bits_max,
1655         },
1656 #endif
1657 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1658         {
1659                 .procname       = "mmap_rnd_compat_bits",
1660                 .data           = &mmap_rnd_compat_bits,
1661                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1662                 .mode           = 0600,
1663                 .proc_handler   = proc_dointvec_minmax,
1664                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1665                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1666         },
1667 #endif
1668         { }
1669 };
1670
1671 static struct ctl_table fs_table[] = {
1672         {
1673                 .procname       = "inode-nr",
1674                 .data           = &inodes_stat,
1675                 .maxlen         = 2*sizeof(long),
1676                 .mode           = 0444,
1677                 .proc_handler   = proc_nr_inodes,
1678         },
1679         {
1680                 .procname       = "inode-state",
1681                 .data           = &inodes_stat,
1682                 .maxlen         = 7*sizeof(long),
1683                 .mode           = 0444,
1684                 .proc_handler   = proc_nr_inodes,
1685         },
1686         {
1687                 .procname       = "file-nr",
1688                 .data           = &files_stat,
1689                 .maxlen         = sizeof(files_stat),
1690                 .mode           = 0444,
1691                 .proc_handler   = proc_nr_files,
1692         },
1693         {
1694                 .procname       = "file-max",
1695                 .data           = &files_stat.max_files,
1696                 .maxlen         = sizeof(files_stat.max_files),
1697                 .mode           = 0644,
1698                 .proc_handler   = proc_doulongvec_minmax,
1699                 .extra1         = &zero,
1700                 .extra2         = &long_max,
1701         },
1702         {
1703                 .procname       = "nr_open",
1704                 .data           = &sysctl_nr_open,
1705                 .maxlen         = sizeof(unsigned int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec_minmax,
1708                 .extra1         = &sysctl_nr_open_min,
1709                 .extra2         = &sysctl_nr_open_max,
1710         },
1711         {
1712                 .procname       = "dentry-state",
1713                 .data           = &dentry_stat,
1714                 .maxlen         = 6*sizeof(long),
1715                 .mode           = 0444,
1716                 .proc_handler   = proc_nr_dentry,
1717         },
1718         {
1719                 .procname       = "overflowuid",
1720                 .data           = &fs_overflowuid,
1721                 .maxlen         = sizeof(int),
1722                 .mode           = 0644,
1723                 .proc_handler   = proc_dointvec_minmax,
1724                 .extra1         = &minolduid,
1725                 .extra2         = &maxolduid,
1726         },
1727         {
1728                 .procname       = "overflowgid",
1729                 .data           = &fs_overflowgid,
1730                 .maxlen         = sizeof(int),
1731                 .mode           = 0644,
1732                 .proc_handler   = proc_dointvec_minmax,
1733                 .extra1         = &minolduid,
1734                 .extra2         = &maxolduid,
1735         },
1736 #ifdef CONFIG_FILE_LOCKING
1737         {
1738                 .procname       = "leases-enable",
1739                 .data           = &leases_enable,
1740                 .maxlen         = sizeof(int),
1741                 .mode           = 0644,
1742                 .proc_handler   = proc_dointvec,
1743         },
1744 #endif
1745 #ifdef CONFIG_DNOTIFY
1746         {
1747                 .procname       = "dir-notify-enable",
1748                 .data           = &dir_notify_enable,
1749                 .maxlen         = sizeof(int),
1750                 .mode           = 0644,
1751                 .proc_handler   = proc_dointvec,
1752         },
1753 #endif
1754 #ifdef CONFIG_MMU
1755 #ifdef CONFIG_FILE_LOCKING
1756         {
1757                 .procname       = "lease-break-time",
1758                 .data           = &lease_break_time,
1759                 .maxlen         = sizeof(int),
1760                 .mode           = 0644,
1761                 .proc_handler   = proc_dointvec,
1762         },
1763 #endif
1764 #ifdef CONFIG_AIO
1765         {
1766                 .procname       = "aio-nr",
1767                 .data           = &aio_nr,
1768                 .maxlen         = sizeof(aio_nr),
1769                 .mode           = 0444,
1770                 .proc_handler   = proc_doulongvec_minmax,
1771         },
1772         {
1773                 .procname       = "aio-max-nr",
1774                 .data           = &aio_max_nr,
1775                 .maxlen         = sizeof(aio_max_nr),
1776                 .mode           = 0644,
1777                 .proc_handler   = proc_doulongvec_minmax,
1778         },
1779 #endif /* CONFIG_AIO */
1780 #ifdef CONFIG_INOTIFY_USER
1781         {
1782                 .procname       = "inotify",
1783                 .mode           = 0555,
1784                 .child          = inotify_table,
1785         },
1786 #endif  
1787 #ifdef CONFIG_EPOLL
1788         {
1789                 .procname       = "epoll",
1790                 .mode           = 0555,
1791                 .child          = epoll_table,
1792         },
1793 #endif
1794 #endif
1795         {
1796                 .procname       = "protected_symlinks",
1797                 .data           = &sysctl_protected_symlinks,
1798                 .maxlen         = sizeof(int),
1799                 .mode           = 0600,
1800                 .proc_handler   = proc_dointvec_minmax,
1801                 .extra1         = &zero,
1802                 .extra2         = &one,
1803         },
1804         {
1805                 .procname       = "protected_hardlinks",
1806                 .data           = &sysctl_protected_hardlinks,
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0600,
1809                 .proc_handler   = proc_dointvec_minmax,
1810                 .extra1         = &zero,
1811                 .extra2         = &one,
1812         },
1813         {
1814                 .procname       = "protected_fifos",
1815                 .data           = &sysctl_protected_fifos,
1816                 .maxlen         = sizeof(int),
1817                 .mode           = 0600,
1818                 .proc_handler   = proc_dointvec_minmax,
1819                 .extra1         = &zero,
1820                 .extra2         = &two,
1821         },
1822         {
1823                 .procname       = "protected_regular",
1824                 .data           = &sysctl_protected_regular,
1825                 .maxlen         = sizeof(int),
1826                 .mode           = 0600,
1827                 .proc_handler   = proc_dointvec_minmax,
1828                 .extra1         = &zero,
1829                 .extra2         = &two,
1830         },
1831         {
1832                 .procname       = "suid_dumpable",
1833                 .data           = &suid_dumpable,
1834                 .maxlen         = sizeof(int),
1835                 .mode           = 0644,
1836                 .proc_handler   = proc_dointvec_minmax_coredump,
1837                 .extra1         = &zero,
1838                 .extra2         = &two,
1839         },
1840 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1841         {
1842                 .procname       = "binfmt_misc",
1843                 .mode           = 0555,
1844                 .child          = sysctl_mount_point,
1845         },
1846 #endif
1847         {
1848                 .procname       = "pipe-max-size",
1849                 .data           = &pipe_max_size,
1850                 .maxlen         = sizeof(pipe_max_size),
1851                 .mode           = 0644,
1852                 .proc_handler   = proc_dopipe_max_size,
1853         },
1854         {
1855                 .procname       = "pipe-user-pages-hard",
1856                 .data           = &pipe_user_pages_hard,
1857                 .maxlen         = sizeof(pipe_user_pages_hard),
1858                 .mode           = 0644,
1859                 .proc_handler   = proc_doulongvec_minmax,
1860         },
1861         {
1862                 .procname       = "pipe-user-pages-soft",
1863                 .data           = &pipe_user_pages_soft,
1864                 .maxlen         = sizeof(pipe_user_pages_soft),
1865                 .mode           = 0644,
1866                 .proc_handler   = proc_doulongvec_minmax,
1867         },
1868         {
1869                 .procname       = "mount-max",
1870                 .data           = &sysctl_mount_max,
1871                 .maxlen         = sizeof(unsigned int),
1872                 .mode           = 0644,
1873                 .proc_handler   = proc_dointvec_minmax,
1874                 .extra1         = &one,
1875         },
1876         { }
1877 };
1878
1879 static struct ctl_table debug_table[] = {
1880 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1881         {
1882                 .procname       = "exception-trace",
1883                 .data           = &show_unhandled_signals,
1884                 .maxlen         = sizeof(int),
1885                 .mode           = 0644,
1886                 .proc_handler   = proc_dointvec
1887         },
1888 #endif
1889 #if defined(CONFIG_OPTPROBES)
1890         {
1891                 .procname       = "kprobes-optimization",
1892                 .data           = &sysctl_kprobes_optimization,
1893                 .maxlen         = sizeof(int),
1894                 .mode           = 0644,
1895                 .proc_handler   = proc_kprobes_optimization_handler,
1896                 .extra1         = &zero,
1897                 .extra2         = &one,
1898         },
1899 #endif
1900         { }
1901 };
1902
1903 static struct ctl_table dev_table[] = {
1904         { }
1905 };
1906
1907 int __init sysctl_init(void)
1908 {
1909         struct ctl_table_header *hdr;
1910
1911         hdr = register_sysctl_table(sysctl_base_table);
1912         kmemleak_not_leak(hdr);
1913         return 0;
1914 }
1915
1916 #endif /* CONFIG_SYSCTL */
1917
1918 /*
1919  * /proc/sys support
1920  */
1921
1922 #ifdef CONFIG_PROC_SYSCTL
1923
1924 static int _proc_do_string(char *data, int maxlen, int write,
1925                            char __user *buffer,
1926                            size_t *lenp, loff_t *ppos)
1927 {
1928         size_t len;
1929         char __user *p;
1930         char c;
1931
1932         if (!data || !maxlen || !*lenp) {
1933                 *lenp = 0;
1934                 return 0;
1935         }
1936
1937         if (write) {
1938                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1939                         /* Only continue writes not past the end of buffer. */
1940                         len = strlen(data);
1941                         if (len > maxlen - 1)
1942                                 len = maxlen - 1;
1943
1944                         if (*ppos > len)
1945                                 return 0;
1946                         len = *ppos;
1947                 } else {
1948                         /* Start writing from beginning of buffer. */
1949                         len = 0;
1950                 }
1951
1952                 *ppos += *lenp;
1953                 p = buffer;
1954                 while ((p - buffer) < *lenp && len < maxlen - 1) {
1955                         if (get_user(c, p++))
1956                                 return -EFAULT;
1957                         if (c == 0 || c == '\n')
1958                                 break;
1959                         data[len++] = c;
1960                 }
1961                 data[len] = 0;
1962         } else {
1963                 len = strlen(data);
1964                 if (len > maxlen)
1965                         len = maxlen;
1966
1967                 if (*ppos > len) {
1968                         *lenp = 0;
1969                         return 0;
1970                 }
1971
1972                 data += *ppos;
1973                 len  -= *ppos;
1974
1975                 if (len > *lenp)
1976                         len = *lenp;
1977                 if (len)
1978                         if (copy_to_user(buffer, data, len))
1979                                 return -EFAULT;
1980                 if (len < *lenp) {
1981                         if (put_user('\n', buffer + len))
1982                                 return -EFAULT;
1983                         len++;
1984                 }
1985                 *lenp = len;
1986                 *ppos += len;
1987         }
1988         return 0;
1989 }
1990
1991 static void warn_sysctl_write(struct ctl_table *table)
1992 {
1993         pr_warn_once("%s wrote to %s when file position was not 0!\n"
1994                 "This will not be supported in the future. To silence this\n"
1995                 "warning, set kernel.sysctl_writes_strict = -1\n",
1996                 current->comm, table->procname);
1997 }
1998
1999 /**
2000  * proc_first_pos_non_zero_ignore - check if first position is allowed
2001  * @ppos: file position
2002  * @table: the sysctl table
2003  *
2004  * Returns true if the first position is non-zero and the sysctl_writes_strict
2005  * mode indicates this is not allowed for numeric input types. String proc
2006  * handlers can ignore the return value.
2007  */
2008 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2009                                            struct ctl_table *table)
2010 {
2011         if (!*ppos)
2012                 return false;
2013
2014         switch (sysctl_writes_strict) {
2015         case SYSCTL_WRITES_STRICT:
2016                 return true;
2017         case SYSCTL_WRITES_WARN:
2018                 warn_sysctl_write(table);
2019                 return false;
2020         default:
2021                 return false;
2022         }
2023 }
2024
2025 /**
2026  * proc_dostring - read a string sysctl
2027  * @table: the sysctl table
2028  * @write: %TRUE if this is a write to the sysctl file
2029  * @buffer: the user buffer
2030  * @lenp: the size of the user buffer
2031  * @ppos: file position
2032  *
2033  * Reads/writes a string from/to the user buffer. If the kernel
2034  * buffer provided is not large enough to hold the string, the
2035  * string is truncated. The copied string is %NULL-terminated.
2036  * If the string is being read by the user process, it is copied
2037  * and a newline '\n' is added. It is truncated if the buffer is
2038  * not large enough.
2039  *
2040  * Returns 0 on success.
2041  */
2042 int proc_dostring(struct ctl_table *table, int write,
2043                   void __user *buffer, size_t *lenp, loff_t *ppos)
2044 {
2045         if (write)
2046                 proc_first_pos_non_zero_ignore(ppos, table);
2047
2048         return _proc_do_string((char *)(table->data), table->maxlen, write,
2049                                (char __user *)buffer, lenp, ppos);
2050 }
2051
2052 static size_t proc_skip_spaces(char **buf)
2053 {
2054         size_t ret;
2055         char *tmp = skip_spaces(*buf);
2056         ret = tmp - *buf;
2057         *buf = tmp;
2058         return ret;
2059 }
2060
2061 static void proc_skip_char(char **buf, size_t *size, const char v)
2062 {
2063         while (*size) {
2064                 if (**buf != v)
2065                         break;
2066                 (*size)--;
2067                 (*buf)++;
2068         }
2069 }
2070
2071 #define TMPBUFLEN 22
2072 /**
2073  * proc_get_long - reads an ASCII formatted integer from a user buffer
2074  *
2075  * @buf: a kernel buffer
2076  * @size: size of the kernel buffer
2077  * @val: this is where the number will be stored
2078  * @neg: set to %TRUE if number is negative
2079  * @perm_tr: a vector which contains the allowed trailers
2080  * @perm_tr_len: size of the perm_tr vector
2081  * @tr: pointer to store the trailer character
2082  *
2083  * In case of success %0 is returned and @buf and @size are updated with
2084  * the amount of bytes read. If @tr is non-NULL and a trailing
2085  * character exists (size is non-zero after returning from this
2086  * function), @tr is updated with the trailing character.
2087  */
2088 static int proc_get_long(char **buf, size_t *size,
2089                           unsigned long *val, bool *neg,
2090                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2091 {
2092         int len;
2093         char *p, tmp[TMPBUFLEN];
2094
2095         if (!*size)
2096                 return -EINVAL;
2097
2098         len = *size;
2099         if (len > TMPBUFLEN - 1)
2100                 len = TMPBUFLEN - 1;
2101
2102         memcpy(tmp, *buf, len);
2103
2104         tmp[len] = 0;
2105         p = tmp;
2106         if (*p == '-' && *size > 1) {
2107                 *neg = true;
2108                 p++;
2109         } else
2110                 *neg = false;
2111         if (!isdigit(*p))
2112                 return -EINVAL;
2113
2114         *val = simple_strtoul(p, &p, 0);
2115
2116         len = p - tmp;
2117
2118         /* We don't know if the next char is whitespace thus we may accept
2119          * invalid integers (e.g. 1234...a) or two integers instead of one
2120          * (e.g. 123...1). So lets not allow such large numbers. */
2121         if (len == TMPBUFLEN - 1)
2122                 return -EINVAL;
2123
2124         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2125                 return -EINVAL;
2126
2127         if (tr && (len < *size))
2128                 *tr = *p;
2129
2130         *buf += len;
2131         *size -= len;
2132
2133         return 0;
2134 }
2135
2136 /**
2137  * proc_put_long - converts an integer to a decimal ASCII formatted string
2138  *
2139  * @buf: the user buffer
2140  * @size: the size of the user buffer
2141  * @val: the integer to be converted
2142  * @neg: sign of the number, %TRUE for negative
2143  *
2144  * In case of success %0 is returned and @buf and @size are updated with
2145  * the amount of bytes written.
2146  */
2147 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2148                           bool neg)
2149 {
2150         int len;
2151         char tmp[TMPBUFLEN], *p = tmp;
2152
2153         sprintf(p, "%s%lu", neg ? "-" : "", val);
2154         len = strlen(tmp);
2155         if (len > *size)
2156                 len = *size;
2157         if (copy_to_user(*buf, tmp, len))
2158                 return -EFAULT;
2159         *size -= len;
2160         *buf += len;
2161         return 0;
2162 }
2163 #undef TMPBUFLEN
2164
2165 static int proc_put_char(void __user **buf, size_t *size, char c)
2166 {
2167         if (*size) {
2168                 char __user **buffer = (char __user **)buf;
2169                 if (put_user(c, *buffer))
2170                         return -EFAULT;
2171                 (*size)--, (*buffer)++;
2172                 *buf = *buffer;
2173         }
2174         return 0;
2175 }
2176
2177 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2178                                  int *valp,
2179                                  int write, void *data)
2180 {
2181         if (write) {
2182                 if (*negp) {
2183                         if (*lvalp > (unsigned long) INT_MAX + 1)
2184                                 return -EINVAL;
2185                         *valp = -*lvalp;
2186                 } else {
2187                         if (*lvalp > (unsigned long) INT_MAX)
2188                                 return -EINVAL;
2189                         *valp = *lvalp;
2190                 }
2191         } else {
2192                 int val = *valp;
2193                 if (val < 0) {
2194                         *negp = true;
2195                         *lvalp = -(unsigned long)val;
2196                 } else {
2197                         *negp = false;
2198                         *lvalp = (unsigned long)val;
2199                 }
2200         }
2201         return 0;
2202 }
2203
2204 static int do_proc_douintvec_conv(unsigned long *lvalp,
2205                                   unsigned int *valp,
2206                                   int write, void *data)
2207 {
2208         if (write) {
2209                 if (*lvalp > UINT_MAX)
2210                         return -EINVAL;
2211                 *valp = *lvalp;
2212         } else {
2213                 unsigned int val = *valp;
2214                 *lvalp = (unsigned long)val;
2215         }
2216         return 0;
2217 }
2218
2219 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2220
2221 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2222                   int write, void __user *buffer,
2223                   size_t *lenp, loff_t *ppos,
2224                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2225                               int write, void *data),
2226                   void *data)
2227 {
2228         int *i, vleft, first = 1, err = 0;
2229         size_t left;
2230         char *kbuf = NULL, *p;
2231         
2232         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2233                 *lenp = 0;
2234                 return 0;
2235         }
2236         
2237         i = (int *) tbl_data;
2238         vleft = table->maxlen / sizeof(*i);
2239         left = *lenp;
2240
2241         if (!conv)
2242                 conv = do_proc_dointvec_conv;
2243
2244         if (write) {
2245                 if (proc_first_pos_non_zero_ignore(ppos, table))
2246                         goto out;
2247
2248                 if (left > PAGE_SIZE - 1)
2249                         left = PAGE_SIZE - 1;
2250                 p = kbuf = memdup_user_nul(buffer, left);
2251                 if (IS_ERR(kbuf))
2252                         return PTR_ERR(kbuf);
2253         }
2254
2255         for (; left && vleft--; i++, first=0) {
2256                 unsigned long lval;
2257                 bool neg;
2258
2259                 if (write) {
2260                         left -= proc_skip_spaces(&p);
2261
2262                         if (!left)
2263                                 break;
2264                         err = proc_get_long(&p, &left, &lval, &neg,
2265                                              proc_wspace_sep,
2266                                              sizeof(proc_wspace_sep), NULL);
2267                         if (err)
2268                                 break;
2269                         if (conv(&neg, &lval, i, 1, data)) {
2270                                 err = -EINVAL;
2271                                 break;
2272                         }
2273                 } else {
2274                         if (conv(&neg, &lval, i, 0, data)) {
2275                                 err = -EINVAL;
2276                                 break;
2277                         }
2278                         if (!first)
2279                                 err = proc_put_char(&buffer, &left, '\t');
2280                         if (err)
2281                                 break;
2282                         err = proc_put_long(&buffer, &left, lval, neg);
2283                         if (err)
2284                                 break;
2285                 }
2286         }
2287
2288         if (!write && !first && left && !err)
2289                 err = proc_put_char(&buffer, &left, '\n');
2290         if (write && !err && left)
2291                 left -= proc_skip_spaces(&p);
2292         if (write) {
2293                 kfree(kbuf);
2294                 if (first)
2295                         return err ? : -EINVAL;
2296         }
2297         *lenp -= left;
2298 out:
2299         *ppos += *lenp;
2300         return err;
2301 }
2302
2303 static int do_proc_dointvec(struct ctl_table *table, int write,
2304                   void __user *buffer, size_t *lenp, loff_t *ppos,
2305                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2306                               int write, void *data),
2307                   void *data)
2308 {
2309         return __do_proc_dointvec(table->data, table, write,
2310                         buffer, lenp, ppos, conv, data);
2311 }
2312
2313 static int do_proc_douintvec_w(unsigned int *tbl_data,
2314                                struct ctl_table *table,
2315                                void __user *buffer,
2316                                size_t *lenp, loff_t *ppos,
2317                                int (*conv)(unsigned long *lvalp,
2318                                            unsigned int *valp,
2319                                            int write, void *data),
2320                                void *data)
2321 {
2322         unsigned long lval;
2323         int err = 0;
2324         size_t left;
2325         bool neg;
2326         char *kbuf = NULL, *p;
2327
2328         left = *lenp;
2329
2330         if (proc_first_pos_non_zero_ignore(ppos, table))
2331                 goto bail_early;
2332
2333         if (left > PAGE_SIZE - 1)
2334                 left = PAGE_SIZE - 1;
2335
2336         p = kbuf = memdup_user_nul(buffer, left);
2337         if (IS_ERR(kbuf))
2338                 return -EINVAL;
2339
2340         left -= proc_skip_spaces(&p);
2341         if (!left) {
2342                 err = -EINVAL;
2343                 goto out_free;
2344         }
2345
2346         err = proc_get_long(&p, &left, &lval, &neg,
2347                              proc_wspace_sep,
2348                              sizeof(proc_wspace_sep), NULL);
2349         if (err || neg) {
2350                 err = -EINVAL;
2351                 goto out_free;
2352         }
2353
2354         if (conv(&lval, tbl_data, 1, data)) {
2355                 err = -EINVAL;
2356                 goto out_free;
2357         }
2358
2359         if (!err && left)
2360                 left -= proc_skip_spaces(&p);
2361
2362 out_free:
2363         kfree(kbuf);
2364         if (err)
2365                 return -EINVAL;
2366
2367         return 0;
2368
2369         /* This is in keeping with old __do_proc_dointvec() */
2370 bail_early:
2371         *ppos += *lenp;
2372         return err;
2373 }
2374
2375 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2376                                size_t *lenp, loff_t *ppos,
2377                                int (*conv)(unsigned long *lvalp,
2378                                            unsigned int *valp,
2379                                            int write, void *data),
2380                                void *data)
2381 {
2382         unsigned long lval;
2383         int err = 0;
2384         size_t left;
2385
2386         left = *lenp;
2387
2388         if (conv(&lval, tbl_data, 0, data)) {
2389                 err = -EINVAL;
2390                 goto out;
2391         }
2392
2393         err = proc_put_long(&buffer, &left, lval, false);
2394         if (err || !left)
2395                 goto out;
2396
2397         err = proc_put_char(&buffer, &left, '\n');
2398
2399 out:
2400         *lenp -= left;
2401         *ppos += *lenp;
2402
2403         return err;
2404 }
2405
2406 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2407                                int write, void __user *buffer,
2408                                size_t *lenp, loff_t *ppos,
2409                                int (*conv)(unsigned long *lvalp,
2410                                            unsigned int *valp,
2411                                            int write, void *data),
2412                                void *data)
2413 {
2414         unsigned int *i, vleft;
2415
2416         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2417                 *lenp = 0;
2418                 return 0;
2419         }
2420
2421         i = (unsigned int *) tbl_data;
2422         vleft = table->maxlen / sizeof(*i);
2423
2424         /*
2425          * Arrays are not supported, keep this simple. *Do not* add
2426          * support for them.
2427          */
2428         if (vleft != 1) {
2429                 *lenp = 0;
2430                 return -EINVAL;
2431         }
2432
2433         if (!conv)
2434                 conv = do_proc_douintvec_conv;
2435
2436         if (write)
2437                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2438                                            conv, data);
2439         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2440 }
2441
2442 static int do_proc_douintvec(struct ctl_table *table, int write,
2443                              void __user *buffer, size_t *lenp, loff_t *ppos,
2444                              int (*conv)(unsigned long *lvalp,
2445                                          unsigned int *valp,
2446                                          int write, void *data),
2447                              void *data)
2448 {
2449         return __do_proc_douintvec(table->data, table, write,
2450                                    buffer, lenp, ppos, conv, data);
2451 }
2452
2453 /**
2454  * proc_dointvec - read a vector of integers
2455  * @table: the sysctl table
2456  * @write: %TRUE if this is a write to the sysctl file
2457  * @buffer: the user buffer
2458  * @lenp: the size of the user buffer
2459  * @ppos: file position
2460  *
2461  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2462  * values from/to the user buffer, treated as an ASCII string. 
2463  *
2464  * Returns 0 on success.
2465  */
2466 int proc_dointvec(struct ctl_table *table, int write,
2467                      void __user *buffer, size_t *lenp, loff_t *ppos)
2468 {
2469         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2470 }
2471
2472 /**
2473  * proc_douintvec - read a vector of unsigned integers
2474  * @table: the sysctl table
2475  * @write: %TRUE if this is a write to the sysctl file
2476  * @buffer: the user buffer
2477  * @lenp: the size of the user buffer
2478  * @ppos: file position
2479  *
2480  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2481  * values from/to the user buffer, treated as an ASCII string.
2482  *
2483  * Returns 0 on success.
2484  */
2485 int proc_douintvec(struct ctl_table *table, int write,
2486                      void __user *buffer, size_t *lenp, loff_t *ppos)
2487 {
2488         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2489                                  do_proc_douintvec_conv, NULL);
2490 }
2491
2492 /*
2493  * Taint values can only be increased
2494  * This means we can safely use a temporary.
2495  */
2496 static int proc_taint(struct ctl_table *table, int write,
2497                                void __user *buffer, size_t *lenp, loff_t *ppos)
2498 {
2499         struct ctl_table t;
2500         unsigned long tmptaint = get_taint();
2501         int err;
2502
2503         if (write && !capable(CAP_SYS_ADMIN))
2504                 return -EPERM;
2505
2506         t = *table;
2507         t.data = &tmptaint;
2508         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2509         if (err < 0)
2510                 return err;
2511
2512         if (write) {
2513                 /*
2514                  * Poor man's atomic or. Not worth adding a primitive
2515                  * to everyone's atomic.h for this
2516                  */
2517                 int i;
2518                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2519                         if ((tmptaint >> i) & 1)
2520                                 add_taint(i, LOCKDEP_STILL_OK);
2521                 }
2522         }
2523
2524         return err;
2525 }
2526
2527 #ifdef CONFIG_PRINTK
2528 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2529                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2530 {
2531         if (write && !capable(CAP_SYS_ADMIN))
2532                 return -EPERM;
2533
2534         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2535 }
2536 #endif
2537
2538 /**
2539  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2540  * @min: pointer to minimum allowable value
2541  * @max: pointer to maximum allowable value
2542  *
2543  * The do_proc_dointvec_minmax_conv_param structure provides the
2544  * minimum and maximum values for doing range checking for those sysctl
2545  * parameters that use the proc_dointvec_minmax() handler.
2546  */
2547 struct do_proc_dointvec_minmax_conv_param {
2548         int *min;
2549         int *max;
2550 };
2551
2552 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2553                                         int *valp,
2554                                         int write, void *data)
2555 {
2556         struct do_proc_dointvec_minmax_conv_param *param = data;
2557         if (write) {
2558                 int val;
2559                 if (*negp) {
2560                         if (*lvalp > (unsigned long) INT_MAX + 1)
2561                                 return -EINVAL;
2562                         val = -*lvalp;
2563                 } else {
2564                         if (*lvalp > (unsigned long) INT_MAX)
2565                                 return -EINVAL;
2566                         val = *lvalp;
2567                 }
2568                 if ((param->min && *param->min > val) ||
2569                     (param->max && *param->max < val))
2570                         return -EINVAL;
2571                 *valp = val;
2572         } else {
2573                 int val = *valp;
2574                 if (val < 0) {
2575                         *negp = true;
2576                         *lvalp = -(unsigned long)val;
2577                 } else {
2578                         *negp = false;
2579                         *lvalp = (unsigned long)val;
2580                 }
2581         }
2582         return 0;
2583 }
2584
2585 /**
2586  * proc_dointvec_minmax - read a vector of integers with min/max values
2587  * @table: the sysctl table
2588  * @write: %TRUE if this is a write to the sysctl file
2589  * @buffer: the user buffer
2590  * @lenp: the size of the user buffer
2591  * @ppos: file position
2592  *
2593  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2594  * values from/to the user buffer, treated as an ASCII string.
2595  *
2596  * This routine will ensure the values are within the range specified by
2597  * table->extra1 (min) and table->extra2 (max).
2598  *
2599  * Returns 0 on success or -EINVAL on write when the range check fails.
2600  */
2601 int proc_dointvec_minmax(struct ctl_table *table, int write,
2602                   void __user *buffer, size_t *lenp, loff_t *ppos)
2603 {
2604         struct do_proc_dointvec_minmax_conv_param param = {
2605                 .min = (int *) table->extra1,
2606                 .max = (int *) table->extra2,
2607         };
2608         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2609                                 do_proc_dointvec_minmax_conv, &param);
2610 }
2611
2612 /**
2613  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2614  * @min: pointer to minimum allowable value
2615  * @max: pointer to maximum allowable value
2616  *
2617  * The do_proc_douintvec_minmax_conv_param structure provides the
2618  * minimum and maximum values for doing range checking for those sysctl
2619  * parameters that use the proc_douintvec_minmax() handler.
2620  */
2621 struct do_proc_douintvec_minmax_conv_param {
2622         unsigned int *min;
2623         unsigned int *max;
2624 };
2625
2626 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2627                                          unsigned int *valp,
2628                                          int write, void *data)
2629 {
2630         struct do_proc_douintvec_minmax_conv_param *param = data;
2631
2632         if (write) {
2633                 unsigned int val = *lvalp;
2634
2635                 if (*lvalp > UINT_MAX)
2636                         return -EINVAL;
2637
2638                 if ((param->min && *param->min > val) ||
2639                     (param->max && *param->max < val))
2640                         return -ERANGE;
2641
2642                 *valp = val;
2643         } else {
2644                 unsigned int val = *valp;
2645                 *lvalp = (unsigned long) val;
2646         }
2647
2648         return 0;
2649 }
2650
2651 /**
2652  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2653  * @table: the sysctl table
2654  * @write: %TRUE if this is a write to the sysctl file
2655  * @buffer: the user buffer
2656  * @lenp: the size of the user buffer
2657  * @ppos: file position
2658  *
2659  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2660  * values from/to the user buffer, treated as an ASCII string. Negative
2661  * strings are not allowed.
2662  *
2663  * This routine will ensure the values are within the range specified by
2664  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2665  * check for UINT_MAX to avoid having to support wrap around uses from
2666  * userspace.
2667  *
2668  * Returns 0 on success or -ERANGE on write when the range check fails.
2669  */
2670 int proc_douintvec_minmax(struct ctl_table *table, int write,
2671                           void __user *buffer, size_t *lenp, loff_t *ppos)
2672 {
2673         struct do_proc_douintvec_minmax_conv_param param = {
2674                 .min = (unsigned int *) table->extra1,
2675                 .max = (unsigned int *) table->extra2,
2676         };
2677         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2678                                  do_proc_douintvec_minmax_conv, &param);
2679 }
2680
2681 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2682                                         unsigned int *valp,
2683                                         int write, void *data)
2684 {
2685         if (write) {
2686                 unsigned int val;
2687
2688                 val = round_pipe_size(*lvalp);
2689                 if (val == 0)
2690                         return -EINVAL;
2691
2692                 *valp = val;
2693         } else {
2694                 unsigned int val = *valp;
2695                 *lvalp = (unsigned long) val;
2696         }
2697
2698         return 0;
2699 }
2700
2701 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2702                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2703 {
2704         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2705                                  do_proc_dopipe_max_size_conv, NULL);
2706 }
2707
2708 static void validate_coredump_safety(void)
2709 {
2710 #ifdef CONFIG_COREDUMP
2711         if (suid_dumpable == SUID_DUMP_ROOT &&
2712             core_pattern[0] != '/' && core_pattern[0] != '|') {
2713                 printk(KERN_WARNING
2714 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2715 "Pipe handler or fully qualified core dump path required.\n"
2716 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2717                 );
2718         }
2719 #endif
2720 }
2721
2722 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2723                 void __user *buffer, size_t *lenp, loff_t *ppos)
2724 {
2725         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2726         if (!error)
2727                 validate_coredump_safety();
2728         return error;
2729 }
2730
2731 #ifdef CONFIG_COREDUMP
2732 static int proc_dostring_coredump(struct ctl_table *table, int write,
2733                   void __user *buffer, size_t *lenp, loff_t *ppos)
2734 {
2735         int error = proc_dostring(table, write, buffer, lenp, ppos);
2736         if (!error)
2737                 validate_coredump_safety();
2738         return error;
2739 }
2740 #endif
2741
2742 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2743                                      void __user *buffer,
2744                                      size_t *lenp, loff_t *ppos,
2745                                      unsigned long convmul,
2746                                      unsigned long convdiv)
2747 {
2748         unsigned long *i, *min, *max;
2749         int vleft, first = 1, err = 0;
2750         size_t left;
2751         char *kbuf = NULL, *p;
2752
2753         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2754                 *lenp = 0;
2755                 return 0;
2756         }
2757
2758         i = (unsigned long *) data;
2759         min = (unsigned long *) table->extra1;
2760         max = (unsigned long *) table->extra2;
2761         vleft = table->maxlen / sizeof(unsigned long);
2762         left = *lenp;
2763
2764         if (write) {
2765                 if (proc_first_pos_non_zero_ignore(ppos, table))
2766                         goto out;
2767
2768                 if (left > PAGE_SIZE - 1)
2769                         left = PAGE_SIZE - 1;
2770                 p = kbuf = memdup_user_nul(buffer, left);
2771                 if (IS_ERR(kbuf))
2772                         return PTR_ERR(kbuf);
2773         }
2774
2775         for (; left && vleft--; i++, first = 0) {
2776                 unsigned long val;
2777
2778                 if (write) {
2779                         bool neg;
2780
2781                         left -= proc_skip_spaces(&p);
2782                         if (!left)
2783                                 break;
2784
2785                         err = proc_get_long(&p, &left, &val, &neg,
2786                                              proc_wspace_sep,
2787                                              sizeof(proc_wspace_sep), NULL);
2788                         if (err)
2789                                 break;
2790                         if (neg)
2791                                 continue;
2792                         val = convmul * val / convdiv;
2793                         if ((min && val < *min) || (max && val > *max))
2794                                 continue;
2795                         *i = val;
2796                 } else {
2797                         val = convdiv * (*i) / convmul;
2798                         if (!first) {
2799                                 err = proc_put_char(&buffer, &left, '\t');
2800                                 if (err)
2801                                         break;
2802                         }
2803                         err = proc_put_long(&buffer, &left, val, false);
2804                         if (err)
2805                                 break;
2806                 }
2807         }
2808
2809         if (!write && !first && left && !err)
2810                 err = proc_put_char(&buffer, &left, '\n');
2811         if (write && !err)
2812                 left -= proc_skip_spaces(&p);
2813         if (write) {
2814                 kfree(kbuf);
2815                 if (first)
2816                         return err ? : -EINVAL;
2817         }
2818         *lenp -= left;
2819 out:
2820         *ppos += *lenp;
2821         return err;
2822 }
2823
2824 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2825                                      void __user *buffer,
2826                                      size_t *lenp, loff_t *ppos,
2827                                      unsigned long convmul,
2828                                      unsigned long convdiv)
2829 {
2830         return __do_proc_doulongvec_minmax(table->data, table, write,
2831                         buffer, lenp, ppos, convmul, convdiv);
2832 }
2833
2834 /**
2835  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2836  * @table: the sysctl table
2837  * @write: %TRUE if this is a write to the sysctl file
2838  * @buffer: the user buffer
2839  * @lenp: the size of the user buffer
2840  * @ppos: file position
2841  *
2842  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2843  * values from/to the user buffer, treated as an ASCII string.
2844  *
2845  * This routine will ensure the values are within the range specified by
2846  * table->extra1 (min) and table->extra2 (max).
2847  *
2848  * Returns 0 on success.
2849  */
2850 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2851                            void __user *buffer, size_t *lenp, loff_t *ppos)
2852 {
2853     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2854 }
2855
2856 /**
2857  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2858  * @table: the sysctl table
2859  * @write: %TRUE if this is a write to the sysctl file
2860  * @buffer: the user buffer
2861  * @lenp: the size of the user buffer
2862  * @ppos: file position
2863  *
2864  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2865  * values from/to the user buffer, treated as an ASCII string. The values
2866  * are treated as milliseconds, and converted to jiffies when they are stored.
2867  *
2868  * This routine will ensure the values are within the range specified by
2869  * table->extra1 (min) and table->extra2 (max).
2870  *
2871  * Returns 0 on success.
2872  */
2873 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2874                                       void __user *buffer,
2875                                       size_t *lenp, loff_t *ppos)
2876 {
2877     return do_proc_doulongvec_minmax(table, write, buffer,
2878                                      lenp, ppos, HZ, 1000l);
2879 }
2880
2881
2882 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2883                                          int *valp,
2884                                          int write, void *data)
2885 {
2886         if (write) {
2887                 if (*lvalp > INT_MAX / HZ)
2888                         return 1;
2889                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2890         } else {
2891                 int val = *valp;
2892                 unsigned long lval;
2893                 if (val < 0) {
2894                         *negp = true;
2895                         lval = -(unsigned long)val;
2896                 } else {
2897                         *negp = false;
2898                         lval = (unsigned long)val;
2899                 }
2900                 *lvalp = lval / HZ;
2901         }
2902         return 0;
2903 }
2904
2905 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2906                                                 int *valp,
2907                                                 int write, void *data)
2908 {
2909         if (write) {
2910                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2911                         return 1;
2912                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2913         } else {
2914                 int val = *valp;
2915                 unsigned long lval;
2916                 if (val < 0) {
2917                         *negp = true;
2918                         lval = -(unsigned long)val;
2919                 } else {
2920                         *negp = false;
2921                         lval = (unsigned long)val;
2922                 }
2923                 *lvalp = jiffies_to_clock_t(lval);
2924         }
2925         return 0;
2926 }
2927
2928 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2929                                             int *valp,
2930                                             int write, void *data)
2931 {
2932         if (write) {
2933                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2934
2935                 if (jif > INT_MAX)
2936                         return 1;
2937                 *valp = (int)jif;
2938         } else {
2939                 int val = *valp;
2940                 unsigned long lval;
2941                 if (val < 0) {
2942                         *negp = true;
2943                         lval = -(unsigned long)val;
2944                 } else {
2945                         *negp = false;
2946                         lval = (unsigned long)val;
2947                 }
2948                 *lvalp = jiffies_to_msecs(lval);
2949         }
2950         return 0;
2951 }
2952
2953 /**
2954  * proc_dointvec_jiffies - read a vector of integers as seconds
2955  * @table: the sysctl table
2956  * @write: %TRUE if this is a write to the sysctl file
2957  * @buffer: the user buffer
2958  * @lenp: the size of the user buffer
2959  * @ppos: file position
2960  *
2961  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2962  * values from/to the user buffer, treated as an ASCII string. 
2963  * The values read are assumed to be in seconds, and are converted into
2964  * jiffies.
2965  *
2966  * Returns 0 on success.
2967  */
2968 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2969                           void __user *buffer, size_t *lenp, loff_t *ppos)
2970 {
2971     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2972                             do_proc_dointvec_jiffies_conv,NULL);
2973 }
2974
2975 /**
2976  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2977  * @table: the sysctl table
2978  * @write: %TRUE if this is a write to the sysctl file
2979  * @buffer: the user buffer
2980  * @lenp: the size of the user buffer
2981  * @ppos: pointer to the file position
2982  *
2983  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2984  * values from/to the user buffer, treated as an ASCII string. 
2985  * The values read are assumed to be in 1/USER_HZ seconds, and 
2986  * are converted into jiffies.
2987  *
2988  * Returns 0 on success.
2989  */
2990 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2991                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2992 {
2993     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2994                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2995 }
2996
2997 /**
2998  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2999  * @table: the sysctl table
3000  * @write: %TRUE if this is a write to the sysctl file
3001  * @buffer: the user buffer
3002  * @lenp: the size of the user buffer
3003  * @ppos: file position
3004  * @ppos: the current position in the file
3005  *
3006  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3007  * values from/to the user buffer, treated as an ASCII string. 
3008  * The values read are assumed to be in 1/1000 seconds, and 
3009  * are converted into jiffies.
3010  *
3011  * Returns 0 on success.
3012  */
3013 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3014                              void __user *buffer, size_t *lenp, loff_t *ppos)
3015 {
3016         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3017                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3018 }
3019
3020 static int proc_do_cad_pid(struct ctl_table *table, int write,
3021                            void __user *buffer, size_t *lenp, loff_t *ppos)
3022 {
3023         struct pid *new_pid;
3024         pid_t tmp;
3025         int r;
3026
3027         tmp = pid_vnr(cad_pid);
3028
3029         r = __do_proc_dointvec(&tmp, table, write, buffer,
3030                                lenp, ppos, NULL, NULL);
3031         if (r || !write)
3032                 return r;
3033
3034         new_pid = find_get_pid(tmp);
3035         if (!new_pid)
3036                 return -ESRCH;
3037
3038         put_pid(xchg(&cad_pid, new_pid));
3039         return 0;
3040 }
3041
3042 /**
3043  * proc_do_large_bitmap - read/write from/to a large bitmap
3044  * @table: the sysctl table
3045  * @write: %TRUE if this is a write to the sysctl file
3046  * @buffer: the user buffer
3047  * @lenp: the size of the user buffer
3048  * @ppos: file position
3049  *
3050  * The bitmap is stored at table->data and the bitmap length (in bits)
3051  * in table->maxlen.
3052  *
3053  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3054  * large bitmaps may be represented in a compact manner. Writing into
3055  * the file will clear the bitmap then update it with the given input.
3056  *
3057  * Returns 0 on success.
3058  */
3059 int proc_do_large_bitmap(struct ctl_table *table, int write,
3060                          void __user *buffer, size_t *lenp, loff_t *ppos)
3061 {
3062         int err = 0;
3063         bool first = 1;
3064         size_t left = *lenp;
3065         unsigned long bitmap_len = table->maxlen;
3066         unsigned long *bitmap = *(unsigned long **) table->data;
3067         unsigned long *tmp_bitmap = NULL;
3068         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3069
3070         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3071                 *lenp = 0;
3072                 return 0;
3073         }
3074
3075         if (write) {
3076                 char *kbuf, *p;
3077
3078                 if (left > PAGE_SIZE - 1)
3079                         left = PAGE_SIZE - 1;
3080
3081                 p = kbuf = memdup_user_nul(buffer, left);
3082                 if (IS_ERR(kbuf))
3083                         return PTR_ERR(kbuf);
3084
3085                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3086                                      sizeof(unsigned long),
3087                                      GFP_KERNEL);
3088                 if (!tmp_bitmap) {
3089                         kfree(kbuf);
3090                         return -ENOMEM;
3091                 }
3092                 proc_skip_char(&p, &left, '\n');
3093                 while (!err && left) {
3094                         unsigned long val_a, val_b;
3095                         bool neg;
3096
3097                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3098                                              sizeof(tr_a), &c);
3099                         if (err)
3100                                 break;
3101                         if (val_a >= bitmap_len || neg) {
3102                                 err = -EINVAL;
3103                                 break;
3104                         }
3105
3106                         val_b = val_a;
3107                         if (left) {
3108                                 p++;
3109                                 left--;
3110                         }
3111
3112                         if (c == '-') {
3113                                 err = proc_get_long(&p, &left, &val_b,
3114                                                      &neg, tr_b, sizeof(tr_b),
3115                                                      &c);
3116                                 if (err)
3117                                         break;
3118                                 if (val_b >= bitmap_len || neg ||
3119                                     val_a > val_b) {
3120                                         err = -EINVAL;
3121                                         break;
3122                                 }
3123                                 if (left) {
3124                                         p++;
3125                                         left--;
3126                                 }
3127                         }
3128
3129                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3130                         first = 0;
3131                         proc_skip_char(&p, &left, '\n');
3132                 }
3133                 kfree(kbuf);
3134         } else {
3135                 unsigned long bit_a, bit_b = 0;
3136
3137                 while (left) {
3138                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3139                         if (bit_a >= bitmap_len)
3140                                 break;
3141                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3142                                                    bit_a + 1) - 1;
3143
3144                         if (!first) {
3145                                 err = proc_put_char(&buffer, &left, ',');
3146                                 if (err)
3147                                         break;
3148                         }
3149                         err = proc_put_long(&buffer, &left, bit_a, false);
3150                         if (err)
3151                                 break;
3152                         if (bit_a != bit_b) {
3153                                 err = proc_put_char(&buffer, &left, '-');
3154                                 if (err)
3155                                         break;
3156                                 err = proc_put_long(&buffer, &left, bit_b, false);
3157                                 if (err)
3158                                         break;
3159                         }
3160
3161                         first = 0; bit_b++;
3162                 }
3163                 if (!err)
3164                         err = proc_put_char(&buffer, &left, '\n');
3165         }
3166
3167         if (!err) {
3168                 if (write) {
3169                         if (*ppos)
3170                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3171                         else
3172                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3173                 }
3174                 *lenp -= left;
3175                 *ppos += *lenp;
3176         }
3177
3178         kfree(tmp_bitmap);
3179         return err;
3180 }
3181
3182 #else /* CONFIG_PROC_SYSCTL */
3183
3184 int proc_dostring(struct ctl_table *table, int write,
3185                   void __user *buffer, size_t *lenp, loff_t *ppos)
3186 {
3187         return -ENOSYS;
3188 }
3189
3190 int proc_dointvec(struct ctl_table *table, int write,
3191                   void __user *buffer, size_t *lenp, loff_t *ppos)
3192 {
3193         return -ENOSYS;
3194 }
3195
3196 int proc_douintvec(struct ctl_table *table, int write,
3197                   void __user *buffer, size_t *lenp, loff_t *ppos)
3198 {
3199         return -ENOSYS;
3200 }
3201
3202 int proc_dointvec_minmax(struct ctl_table *table, int write,
3203                     void __user *buffer, size_t *lenp, loff_t *ppos)
3204 {
3205         return -ENOSYS;
3206 }
3207
3208 int proc_douintvec_minmax(struct ctl_table *table, int write,
3209                           void __user *buffer, size_t *lenp, loff_t *ppos)
3210 {
3211         return -ENOSYS;
3212 }
3213
3214 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3215                     void __user *buffer, size_t *lenp, loff_t *ppos)
3216 {
3217         return -ENOSYS;
3218 }
3219
3220 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3221                     void __user *buffer, size_t *lenp, loff_t *ppos)
3222 {
3223         return -ENOSYS;
3224 }
3225
3226 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3227                              void __user *buffer, size_t *lenp, loff_t *ppos)
3228 {
3229         return -ENOSYS;
3230 }
3231
3232 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3233                     void __user *buffer, size_t *lenp, loff_t *ppos)
3234 {
3235         return -ENOSYS;
3236 }
3237
3238 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3239                                       void __user *buffer,
3240                                       size_t *lenp, loff_t *ppos)
3241 {
3242     return -ENOSYS;
3243 }
3244
3245
3246 #endif /* CONFIG_PROC_SYSCTL */
3247
3248 /*
3249  * No sense putting this after each symbol definition, twice,
3250  * exception granted :-)
3251  */
3252 EXPORT_SYMBOL(proc_dointvec);
3253 EXPORT_SYMBOL(proc_douintvec);
3254 EXPORT_SYMBOL(proc_dointvec_jiffies);
3255 EXPORT_SYMBOL(proc_dointvec_minmax);
3256 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3257 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3258 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3259 EXPORT_SYMBOL(proc_dostring);
3260 EXPORT_SYMBOL(proc_doulongvec_minmax);
3261 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);