linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/aio.h>
  23#include <linux/mm.h>
  24#include <linux/swap.h>
  25#include <linux/slab.h>
  26#include <linux/sysctl.h>
  27#include <linux/bitmap.h>
  28#include <linux/signal.h>
  29#include <linux/printk.h>
  30#include <linux/proc_fs.h>
  31#include <linux/security.h>
  32#include <linux/ctype.h>
  33#include <linux/kmemleak.h>
  34#include <linux/fs.h>
  35#include <linux/init.h>
  36#include <linux/kernel.h>
  37#include <linux/kobject.h>
  38#include <linux/net.h>
  39#include <linux/sysrq.h>
  40#include <linux/highuid.h>
  41#include <linux/writeback.h>
  42#include <linux/ratelimit.h>
  43#include <linux/compaction.h>
  44#include <linux/hugetlb.h>
  45#include <linux/initrd.h>
  46#include <linux/key.h>
  47#include <linux/times.h>
  48#include <linux/limits.h>
  49#include <linux/dcache.h>
  50#include <linux/dnotify.h>
  51#include <linux/syscalls.h>
  52#include <linux/vmstat.h>
  53#include <linux/nfs_fs.h>
  54#include <linux/acpi.h>
  55#include <linux/reboot.h>
  56#include <linux/ftrace.h>
  57#include <linux/perf_event.h>
  58#include <linux/kprobes.h>
  59#include <linux/pipe_fs_i.h>
  60#include <linux/oom.h>
  61#include <linux/kmod.h>
  62#include <linux/capability.h>
  63#include <linux/binfmts.h>
  64#include <linux/sched/sysctl.h>
  65#include <linux/sched/coredump.h>
  66#include <linux/kexec.h>
  67#include <linux/bpf.h>
  68#include <linux/mount.h>
  69
  70#include "../lib/kstrtox.h"
  71
  72#include <linux/uaccess.h>
  73#include <asm/processor.h>
  74
  75#ifdef CONFIG_X86
  76#include <asm/nmi.h>
  77#include <asm/stacktrace.h>
  78#include <asm/io.h>
  79#endif
  80#ifdef CONFIG_SPARC
  81#include <asm/setup.h>
  82#endif
  83#ifdef CONFIG_BSD_PROCESS_ACCT
  84#include <linux/acct.h>
  85#endif
  86#ifdef CONFIG_RT_MUTEXES
  87#include <linux/rtmutex.h>
  88#endif
  89#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
  90#include <linux/lockdep.h>
  91#endif
  92#ifdef CONFIG_CHR_DEV_SG
  93#include <scsi/sg.h>
  94#endif
  95#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
  96#include <linux/stackleak.h>
  97#endif
  98#ifdef CONFIG_LOCKUP_DETECTOR
  99#include <linux/nmi.h>
 100#endif
 101
 102#if defined(CONFIG_SYSCTL)
 103
 104/* External variables not in a header file. */
 105extern int suid_dumpable;
 106#ifdef CONFIG_COREDUMP
 107extern int core_uses_pid;
 108extern char core_pattern[];
 109extern unsigned int core_pipe_limit;
 110#endif
 111extern int pid_max;
 112extern int pid_max_min, pid_max_max;
 113extern int percpu_pagelist_fraction;
 114extern int latencytop_enabled;
 115extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 116#ifndef CONFIG_MMU
 117extern int sysctl_nr_trim_pages;
 118#endif
 119
 120/* Constants used for minimum and  maximum */
 121#ifdef CONFIG_LOCKUP_DETECTOR
 122static int sixty = 60;
 123#endif
 124
 125static int __maybe_unused neg_one = -1;
 126
 127static int zero;
 128static int __maybe_unused one = 1;
 129static int __maybe_unused two = 2;
 130static int __maybe_unused four = 4;
 131static unsigned long zero_ul;
 132static unsigned long one_ul = 1;
 133static unsigned long long_max = LONG_MAX;
 134static int one_hundred = 100;
 135static int one_thousand = 1000;
 136#ifdef CONFIG_PRINTK
 137static int ten_thousand = 10000;
 138#endif
 139#ifdef CONFIG_PERF_EVENTS
 140static int six_hundred_forty_kb = 640 * 1024;
 141#endif
 142
 143/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 144static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 145
 146/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 147static int maxolduid = 65535;
 148static int minolduid;
 149
 150static int ngroups_max = NGROUPS_MAX;
 151static const int cap_last_cap = CAP_LAST_CAP;
 152
 153/*
 154 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
 155 * and hung_task_check_interval_secs
 156 */
 157#ifdef CONFIG_DETECT_HUNG_TASK
 158static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 159#endif
 160
 161#ifdef CONFIG_INOTIFY_USER
 162#include <linux/inotify.h>
 163#endif
 164#ifdef CONFIG_SPARC
 165#endif
 166
 167#ifdef __hppa__
 168extern int pwrsw_enabled;
 169#endif
 170
 171#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 172extern int unaligned_enabled;
 173#endif
 174
 175#ifdef CONFIG_IA64
 176extern int unaligned_dump_stack;
 177#endif
 178
 179#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 180extern int no_unaligned_warning;
 181#endif
 182
 183#ifdef CONFIG_PROC_SYSCTL
 184
 185/**
 186 * enum sysctl_writes_mode - supported sysctl write modes
 187 *
 188 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 189 *      to be written, and multiple writes on the same sysctl file descriptor
 190 *      will rewrite the sysctl value, regardless of file position. No warning
 191 *      is issued when the initial position is not 0.
 192 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 193 *      not 0.
 194 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 195 *      file position 0 and the value must be fully contained in the buffer
 196 *      sent to the write syscall. If dealing with strings respect the file
 197 *      position, but restrict this to the max length of the buffer, anything
 198 *      passed the max lenght will be ignored. Multiple writes will append
 199 *      to the buffer.
 200 *
 201 * These write modes control how current file position affects the behavior of
 202 * updating sysctl values through the proc interface on each write.
 203 */
 204enum sysctl_writes_mode {
 205        SYSCTL_WRITES_LEGACY            = -1,
 206        SYSCTL_WRITES_WARN              = 0,
 207        SYSCTL_WRITES_STRICT            = 1,
 208};
 209
 210static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 211
 212static int proc_do_cad_pid(struct ctl_table *table, int write,
 213                  void __user *buffer, size_t *lenp, loff_t *ppos);
 214static int proc_taint(struct ctl_table *table, int write,
 215                               void __user *buffer, size_t *lenp, loff_t *ppos);
 216#endif
 217
 218#ifdef CONFIG_PRINTK
 219static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 220                                void __user *buffer, size_t *lenp, loff_t *ppos);
 221#endif
 222
 223static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 224                void __user *buffer, size_t *lenp, loff_t *ppos);
 225#ifdef CONFIG_COREDUMP
 226static int proc_dostring_coredump(struct ctl_table *table, int write,
 227                void __user *buffer, size_t *lenp, loff_t *ppos);
 228#endif
 229static int proc_dopipe_max_size(struct ctl_table *table, int write,
 230                void __user *buffer, size_t *lenp, loff_t *ppos);
 231#ifdef CONFIG_BPF_SYSCALL
 232static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
 233                                          void __user *buffer, size_t *lenp,
 234                                          loff_t *ppos);
 235#endif
 236
 237#ifdef CONFIG_MAGIC_SYSRQ
 238/* Note: sysrq code uses its own private copy */
 239static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 240
 241static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 242                                void __user *buffer, size_t *lenp,
 243                                loff_t *ppos)
 244{
 245        int error;
 246
 247        error = proc_dointvec(table, write, buffer, lenp, ppos);
 248        if (error)
 249                return error;
 250
 251        if (write)
 252                sysrq_toggle_support(__sysrq_enabled);
 253
 254        return 0;
 255}
 256
 257#endif
 258
 259static struct ctl_table kern_table[];
 260static struct ctl_table vm_table[];
 261static struct ctl_table fs_table[];
 262static struct ctl_table debug_table[];
 263static struct ctl_table dev_table[];
 264extern struct ctl_table random_table[];
 265#ifdef CONFIG_EPOLL
 266extern struct ctl_table epoll_table[];
 267#endif
 268
 269#ifdef CONFIG_FW_LOADER_USER_HELPER
 270extern struct ctl_table firmware_config_table[];
 271#endif
 272
 273#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 274int sysctl_legacy_va_layout;
 275#endif
 276
 277/* The default sysctl tables: */
 278
 279static struct ctl_table sysctl_base_table[] = {
 280        {
 281                .procname       = "kernel",
 282                .mode           = 0555,
 283                .child          = kern_table,
 284        },
 285        {
 286                .procname       = "vm",
 287                .mode           = 0555,
 288                .child          = vm_table,
 289        },
 290        {
 291                .procname       = "fs",
 292                .mode           = 0555,
 293                .child          = fs_table,
 294        },
 295        {
 296                .procname       = "debug",
 297                .mode           = 0555,
 298                .child          = debug_table,
 299        },
 300        {
 301                .procname       = "dev",
 302                .mode           = 0555,
 303                .child          = dev_table,
 304        },
 305        { }
 306};
 307
 308#ifdef CONFIG_SCHED_DEBUG
 309static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 310static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 311static int min_wakeup_granularity_ns;                   /* 0 usecs */
 312static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 313#ifdef CONFIG_SMP
 314static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 315static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 316#endif /* CONFIG_SMP */
 317#endif /* CONFIG_SCHED_DEBUG */
 318
 319#ifdef CONFIG_COMPACTION
 320static int min_extfrag_threshold;
 321static int max_extfrag_threshold = 1000;
 322#endif
 323
 324static struct ctl_table kern_table[] = {
 325        {
 326                .procname       = "sched_child_runs_first",
 327                .data           = &sysctl_sched_child_runs_first,
 328                .maxlen         = sizeof(unsigned int),
 329                .mode           = 0644,
 330                .proc_handler   = proc_dointvec,
 331        },
 332#ifdef CONFIG_SCHED_DEBUG
 333        {
 334                .procname       = "sched_min_granularity_ns",
 335                .data           = &sysctl_sched_min_granularity,
 336                .maxlen         = sizeof(unsigned int),
 337                .mode           = 0644,
 338                .proc_handler   = sched_proc_update_handler,
 339                .extra1         = &min_sched_granularity_ns,
 340                .extra2         = &max_sched_granularity_ns,
 341        },
 342        {
 343                .procname       = "sched_latency_ns",
 344                .data           = &sysctl_sched_latency,
 345                .maxlen         = sizeof(unsigned int),
 346                .mode           = 0644,
 347                .proc_handler   = sched_proc_update_handler,
 348                .extra1         = &min_sched_granularity_ns,
 349                .extra2         = &max_sched_granularity_ns,
 350        },
 351        {
 352                .procname       = "sched_wakeup_granularity_ns",
 353                .data           = &sysctl_sched_wakeup_granularity,
 354                .maxlen         = sizeof(unsigned int),
 355                .mode           = 0644,
 356                .proc_handler   = sched_proc_update_handler,
 357                .extra1         = &min_wakeup_granularity_ns,
 358                .extra2         = &max_wakeup_granularity_ns,
 359        },
 360#ifdef CONFIG_SMP
 361        {
 362                .procname       = "sched_tunable_scaling",
 363                .data           = &sysctl_sched_tunable_scaling,
 364                .maxlen         = sizeof(enum sched_tunable_scaling),
 365                .mode           = 0644,
 366                .proc_handler   = sched_proc_update_handler,
 367                .extra1         = &min_sched_tunable_scaling,
 368                .extra2         = &max_sched_tunable_scaling,
 369        },
 370        {
 371                .procname       = "sched_migration_cost_ns",
 372                .data           = &sysctl_sched_migration_cost,
 373                .maxlen         = sizeof(unsigned int),
 374                .mode           = 0644,
 375                .proc_handler   = proc_dointvec,
 376        },
 377        {
 378                .procname       = "sched_nr_migrate",
 379                .data           = &sysctl_sched_nr_migrate,
 380                .maxlen         = sizeof(unsigned int),
 381                .mode           = 0644,
 382                .proc_handler   = proc_dointvec,
 383        },
 384#ifdef CONFIG_SCHEDSTATS
 385        {
 386                .procname       = "sched_schedstats",
 387                .data           = NULL,
 388                .maxlen         = sizeof(unsigned int),
 389                .mode           = 0644,
 390                .proc_handler   = sysctl_schedstats,
 391                .extra1         = &zero,
 392                .extra2         = &one,
 393        },
 394#endif /* CONFIG_SCHEDSTATS */
 395#endif /* CONFIG_SMP */
 396#ifdef CONFIG_NUMA_BALANCING
 397        {
 398                .procname       = "numa_balancing_scan_delay_ms",
 399                .data           = &sysctl_numa_balancing_scan_delay,
 400                .maxlen         = sizeof(unsigned int),
 401                .mode           = 0644,
 402                .proc_handler   = proc_dointvec,
 403        },
 404        {
 405                .procname       = "numa_balancing_scan_period_min_ms",
 406                .data           = &sysctl_numa_balancing_scan_period_min,
 407                .maxlen         = sizeof(unsigned int),
 408                .mode           = 0644,
 409                .proc_handler   = proc_dointvec,
 410        },
 411        {
 412                .procname       = "numa_balancing_scan_period_max_ms",
 413                .data           = &sysctl_numa_balancing_scan_period_max,
 414                .maxlen         = sizeof(unsigned int),
 415                .mode           = 0644,
 416                .proc_handler   = proc_dointvec,
 417        },
 418        {
 419                .procname       = "numa_balancing_scan_size_mb",
 420                .data           = &sysctl_numa_balancing_scan_size,
 421                .maxlen         = sizeof(unsigned int),
 422                .mode           = 0644,
 423                .proc_handler   = proc_dointvec_minmax,
 424                .extra1         = &one,
 425        },
 426        {
 427                .procname       = "numa_balancing",
 428                .data           = NULL, /* filled in by handler */
 429                .maxlen         = sizeof(unsigned int),
 430                .mode           = 0644,
 431                .proc_handler   = sysctl_numa_balancing,
 432                .extra1         = &zero,
 433                .extra2         = &one,
 434        },
 435#endif /* CONFIG_NUMA_BALANCING */
 436#endif /* CONFIG_SCHED_DEBUG */
 437        {
 438                .procname       = "sched_rt_period_us",
 439                .data           = &sysctl_sched_rt_period,
 440                .maxlen         = sizeof(unsigned int),
 441                .mode           = 0644,
 442                .proc_handler   = sched_rt_handler,
 443        },
 444        {
 445                .procname       = "sched_rt_runtime_us",
 446                .data           = &sysctl_sched_rt_runtime,
 447                .maxlen         = sizeof(int),
 448                .mode           = 0644,
 449                .proc_handler   = sched_rt_handler,
 450        },
 451        {
 452                .procname       = "sched_rr_timeslice_ms",
 453                .data           = &sysctl_sched_rr_timeslice,
 454                .maxlen         = sizeof(int),
 455                .mode           = 0644,
 456                .proc_handler   = sched_rr_handler,
 457        },
 458#ifdef CONFIG_SCHED_AUTOGROUP
 459        {
 460                .procname       = "sched_autogroup_enabled",
 461                .data           = &sysctl_sched_autogroup_enabled,
 462                .maxlen         = sizeof(unsigned int),
 463                .mode           = 0644,
 464                .proc_handler   = proc_dointvec_minmax,
 465                .extra1         = &zero,
 466                .extra2         = &one,
 467        },
 468#endif
 469#ifdef CONFIG_CFS_BANDWIDTH
 470        {
 471                .procname       = "sched_cfs_bandwidth_slice_us",
 472                .data           = &sysctl_sched_cfs_bandwidth_slice,
 473                .maxlen         = sizeof(unsigned int),
 474                .mode           = 0644,
 475                .proc_handler   = proc_dointvec_minmax,
 476                .extra1         = &one,
 477        },
 478#endif
 479#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 480        {
 481                .procname       = "sched_energy_aware",
 482                .data           = &sysctl_sched_energy_aware,
 483                .maxlen         = sizeof(unsigned int),
 484                .mode           = 0644,
 485                .proc_handler   = sched_energy_aware_handler,
 486                .extra1         = &zero,
 487                .extra2         = &one,
 488        },
 489#endif
 490#ifdef CONFIG_PROVE_LOCKING
 491        {
 492                .procname       = "prove_locking",
 493                .data           = &prove_locking,
 494                .maxlen         = sizeof(int),
 495                .mode           = 0644,
 496                .proc_handler   = proc_dointvec,
 497        },
 498#endif
 499#ifdef CONFIG_LOCK_STAT
 500        {
 501                .procname       = "lock_stat",
 502                .data           = &lock_stat,
 503                .maxlen         = sizeof(int),
 504                .mode           = 0644,
 505                .proc_handler   = proc_dointvec,
 506        },
 507#endif
 508        {
 509                .procname       = "panic",
 510                .data           = &panic_timeout,
 511                .maxlen         = sizeof(int),
 512                .mode           = 0644,
 513                .proc_handler   = proc_dointvec,
 514        },
 515#ifdef CONFIG_COREDUMP
 516        {
 517                .procname       = "core_uses_pid",
 518                .data           = &core_uses_pid,
 519                .maxlen         = sizeof(int),
 520                .mode           = 0644,
 521                .proc_handler   = proc_dointvec,
 522        },
 523        {
 524                .procname       = "core_pattern",
 525                .data           = core_pattern,
 526                .maxlen         = CORENAME_MAX_SIZE,
 527                .mode           = 0644,
 528                .proc_handler   = proc_dostring_coredump,
 529        },
 530        {
 531                .procname       = "core_pipe_limit",
 532                .data           = &core_pipe_limit,
 533                .maxlen         = sizeof(unsigned int),
 534                .mode           = 0644,
 535                .proc_handler   = proc_dointvec,
 536        },
 537#endif
 538#ifdef CONFIG_PROC_SYSCTL
 539        {
 540                .procname       = "tainted",
 541                .maxlen         = sizeof(long),
 542                .mode           = 0644,
 543                .proc_handler   = proc_taint,
 544        },
 545        {
 546                .procname       = "sysctl_writes_strict",
 547                .data           = &sysctl_writes_strict,
 548                .maxlen         = sizeof(int),
 549                .mode           = 0644,
 550                .proc_handler   = proc_dointvec_minmax,
 551                .extra1         = &neg_one,
 552                .extra2         = &one,
 553        },
 554#endif
 555#ifdef CONFIG_LATENCYTOP
 556        {
 557                .procname       = "latencytop",
 558                .data           = &latencytop_enabled,
 559                .maxlen         = sizeof(int),
 560                .mode           = 0644,
 561                .proc_handler   = sysctl_latencytop,
 562        },
 563#endif
 564#ifdef CONFIG_BLK_DEV_INITRD
 565        {
 566                .procname       = "real-root-dev",
 567                .data           = &real_root_dev,
 568                .maxlen         = sizeof(int),
 569                .mode           = 0644,
 570                .proc_handler   = proc_dointvec,
 571        },
 572#endif
 573        {
 574                .procname       = "print-fatal-signals",
 575                .data           = &print_fatal_signals,
 576                .maxlen         = sizeof(int),
 577                .mode           = 0644,
 578                .proc_handler   = proc_dointvec,
 579        },
 580#ifdef CONFIG_SPARC
 581        {
 582                .procname       = "reboot-cmd",
 583                .data           = reboot_command,
 584                .maxlen         = 256,
 585                .mode           = 0644,
 586                .proc_handler   = proc_dostring,
 587        },
 588        {
 589                .procname       = "stop-a",
 590                .data           = &stop_a_enabled,
 591                .maxlen         = sizeof (int),
 592                .mode           = 0644,
 593                .proc_handler   = proc_dointvec,
 594        },
 595        {
 596                .procname       = "scons-poweroff",
 597                .data           = &scons_pwroff,
 598                .maxlen         = sizeof (int),
 599                .mode           = 0644,
 600                .proc_handler   = proc_dointvec,
 601        },
 602#endif
 603#ifdef CONFIG_SPARC64
 604        {
 605                .procname       = "tsb-ratio",
 606                .data           = &sysctl_tsb_ratio,
 607                .maxlen         = sizeof (int),
 608                .mode           = 0644,
 609                .proc_handler   = proc_dointvec,
 610        },
 611#endif
 612#ifdef __hppa__
 613        {
 614                .procname       = "soft-power",
 615                .data           = &pwrsw_enabled,
 616                .maxlen         = sizeof (int),
 617                .mode           = 0644,
 618                .proc_handler   = proc_dointvec,
 619        },
 620#endif
 621#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 622        {
 623                .procname       = "unaligned-trap",
 624                .data           = &unaligned_enabled,
 625                .maxlen         = sizeof (int),
 626                .mode           = 0644,
 627                .proc_handler   = proc_dointvec,
 628        },
 629#endif
 630        {
 631                .procname       = "ctrl-alt-del",
 632                .data           = &C_A_D,
 633                .maxlen         = sizeof(int),
 634                .mode           = 0644,
 635                .proc_handler   = proc_dointvec,
 636        },
 637#ifdef CONFIG_FUNCTION_TRACER
 638        {
 639                .procname       = "ftrace_enabled",
 640                .data           = &ftrace_enabled,
 641                .maxlen         = sizeof(int),
 642                .mode           = 0644,
 643                .proc_handler   = ftrace_enable_sysctl,
 644        },
 645#endif
 646#ifdef CONFIG_STACK_TRACER
 647        {
 648                .procname       = "stack_tracer_enabled",
 649                .data           = &stack_tracer_enabled,
 650                .maxlen         = sizeof(int),
 651                .mode           = 0644,
 652                .proc_handler   = stack_trace_sysctl,
 653        },
 654#endif
 655#ifdef CONFIG_TRACING
 656        {
 657                .procname       = "ftrace_dump_on_oops",
 658                .data           = &ftrace_dump_on_oops,
 659                .maxlen         = sizeof(int),
 660                .mode           = 0644,
 661                .proc_handler   = proc_dointvec,
 662        },
 663        {
 664                .procname       = "traceoff_on_warning",
 665                .data           = &__disable_trace_on_warning,
 666                .maxlen         = sizeof(__disable_trace_on_warning),
 667                .mode           = 0644,
 668                .proc_handler   = proc_dointvec,
 669        },
 670        {
 671                .procname       = "tracepoint_printk",
 672                .data           = &tracepoint_printk,
 673                .maxlen         = sizeof(tracepoint_printk),
 674                .mode           = 0644,
 675                .proc_handler   = tracepoint_printk_sysctl,
 676        },
 677#endif
 678#ifdef CONFIG_KEXEC_CORE
 679        {
 680                .procname       = "kexec_load_disabled",
 681                .data           = &kexec_load_disabled,
 682                .maxlen         = sizeof(int),
 683                .mode           = 0644,
 684                /* only handle a transition from default "0" to "1" */
 685                .proc_handler   = proc_dointvec_minmax,
 686                .extra1         = &one,
 687                .extra2         = &one,
 688        },
 689#endif
 690#ifdef CONFIG_MODULES
 691        {
 692                .procname       = "modprobe",
 693                .data           = &modprobe_path,
 694                .maxlen         = KMOD_PATH_LEN,
 695                .mode           = 0644,
 696                .proc_handler   = proc_dostring,
 697        },
 698        {
 699                .procname       = "modules_disabled",
 700                .data           = &modules_disabled,
 701                .maxlen         = sizeof(int),
 702                .mode           = 0644,
 703                /* only handle a transition from default "0" to "1" */
 704                .proc_handler   = proc_dointvec_minmax,
 705                .extra1         = &one,
 706                .extra2         = &one,
 707        },
 708#endif
 709#ifdef CONFIG_UEVENT_HELPER
 710        {
 711                .procname       = "hotplug",
 712                .data           = &uevent_helper,
 713                .maxlen         = UEVENT_HELPER_PATH_LEN,
 714                .mode           = 0644,
 715                .proc_handler   = proc_dostring,
 716        },
 717#endif
 718#ifdef CONFIG_CHR_DEV_SG
 719        {
 720                .procname       = "sg-big-buff",
 721                .data           = &sg_big_buff,
 722                .maxlen         = sizeof (int),
 723                .mode           = 0444,
 724                .proc_handler   = proc_dointvec,
 725        },
 726#endif
 727#ifdef CONFIG_BSD_PROCESS_ACCT
 728        {
 729                .procname       = "acct",
 730                .data           = &acct_parm,
 731                .maxlen         = 3*sizeof(int),
 732                .mode           = 0644,
 733                .proc_handler   = proc_dointvec,
 734        },
 735#endif
 736#ifdef CONFIG_MAGIC_SYSRQ
 737        {
 738                .procname       = "sysrq",
 739                .data           = &__sysrq_enabled,
 740                .maxlen         = sizeof (int),
 741                .mode           = 0644,
 742                .proc_handler   = sysrq_sysctl_handler,
 743        },
 744#endif
 745#ifdef CONFIG_PROC_SYSCTL
 746        {
 747                .procname       = "cad_pid",
 748                .data           = NULL,
 749                .maxlen         = sizeof (int),
 750                .mode           = 0600,
 751                .proc_handler   = proc_do_cad_pid,
 752        },
 753#endif
 754        {
 755                .procname       = "threads-max",
 756                .data           = NULL,
 757                .maxlen         = sizeof(int),
 758                .mode           = 0644,
 759                .proc_handler   = sysctl_max_threads,
 760        },
 761        {
 762                .procname       = "random",
 763                .mode           = 0555,
 764                .child          = random_table,
 765        },
 766        {
 767                .procname       = "usermodehelper",
 768                .mode           = 0555,
 769                .child          = usermodehelper_table,
 770        },
 771#ifdef CONFIG_FW_LOADER_USER_HELPER
 772        {
 773                .procname       = "firmware_config",
 774                .mode           = 0555,
 775                .child          = firmware_config_table,
 776        },
 777#endif
 778        {
 779                .procname       = "overflowuid",
 780                .data           = &overflowuid,
 781                .maxlen         = sizeof(int),
 782                .mode           = 0644,
 783                .proc_handler   = proc_dointvec_minmax,
 784                .extra1         = &minolduid,
 785                .extra2         = &maxolduid,
 786        },
 787        {
 788                .procname       = "overflowgid",
 789                .data           = &overflowgid,
 790                .maxlen         = sizeof(int),
 791                .mode           = 0644,
 792                .proc_handler   = proc_dointvec_minmax,
 793                .extra1         = &minolduid,
 794                .extra2         = &maxolduid,
 795        },
 796#ifdef CONFIG_S390
 797#ifdef CONFIG_MATHEMU
 798        {
 799                .procname       = "ieee_emulation_warnings",
 800                .data           = &sysctl_ieee_emulation_warnings,
 801                .maxlen         = sizeof(int),
 802                .mode           = 0644,
 803                .proc_handler   = proc_dointvec,
 804        },
 805#endif
 806        {
 807                .procname       = "userprocess_debug",
 808                .data           = &show_unhandled_signals,
 809                .maxlen         = sizeof(int),
 810                .mode           = 0644,
 811                .proc_handler   = proc_dointvec,
 812        },
 813#endif
 814        {
 815                .procname       = "pid_max",
 816                .data           = &pid_max,
 817                .maxlen         = sizeof (int),
 818                .mode           = 0644,
 819                .proc_handler   = proc_dointvec_minmax,
 820                .extra1         = &pid_max_min,
 821                .extra2         = &pid_max_max,
 822        },
 823        {
 824                .procname       = "panic_on_oops",
 825                .data           = &panic_on_oops,
 826                .maxlen         = sizeof(int),
 827                .mode           = 0644,
 828                .proc_handler   = proc_dointvec,
 829        },
 830        {
 831                .procname       = "panic_print",
 832                .data           = &panic_print,
 833                .maxlen         = sizeof(unsigned long),
 834                .mode           = 0644,
 835                .proc_handler   = proc_doulongvec_minmax,
 836        },
 837#if defined CONFIG_PRINTK
 838        {
 839                .procname       = "printk",
 840                .data           = &console_loglevel,
 841                .maxlen         = 4*sizeof(int),
 842                .mode           = 0644,
 843                .proc_handler   = proc_dointvec,
 844        },
 845        {
 846                .procname       = "printk_ratelimit",
 847                .data           = &printk_ratelimit_state.interval,
 848                .maxlen         = sizeof(int),
 849                .mode           = 0644,
 850                .proc_handler   = proc_dointvec_jiffies,
 851        },
 852        {
 853                .procname       = "printk_ratelimit_burst",
 854                .data           = &printk_ratelimit_state.burst,
 855                .maxlen         = sizeof(int),
 856                .mode           = 0644,
 857                .proc_handler   = proc_dointvec,
 858        },
 859        {
 860                .procname       = "printk_delay",
 861                .data           = &printk_delay_msec,
 862                .maxlen         = sizeof(int),
 863                .mode           = 0644,
 864                .proc_handler   = proc_dointvec_minmax,
 865                .extra1         = &zero,
 866                .extra2         = &ten_thousand,
 867        },
 868        {
 869                .procname       = "printk_devkmsg",
 870                .data           = devkmsg_log_str,
 871                .maxlen         = DEVKMSG_STR_MAX_SIZE,
 872                .mode           = 0644,
 873                .proc_handler   = devkmsg_sysctl_set_loglvl,
 874        },
 875        {
 876                .procname       = "dmesg_restrict",
 877                .data           = &dmesg_restrict,
 878                .maxlen         = sizeof(int),
 879                .mode           = 0644,
 880                .proc_handler   = proc_dointvec_minmax_sysadmin,
 881                .extra1         = &zero,
 882                .extra2         = &one,
 883        },
 884        {
 885                .procname       = "kptr_restrict",
 886                .data           = &kptr_restrict,
 887                .maxlen         = sizeof(int),
 888                .mode           = 0644,
 889                .proc_handler   = proc_dointvec_minmax_sysadmin,
 890                .extra1         = &zero,
 891                .extra2         = &two,
 892        },
 893#endif
 894        {
 895                .procname       = "ngroups_max",
 896                .data           = &ngroups_max,
 897                .maxlen         = sizeof (int),
 898                .mode           = 0444,
 899                .proc_handler   = proc_dointvec,
 900        },
 901        {
 902                .procname       = "cap_last_cap",
 903                .data           = (void *)&cap_last_cap,
 904                .maxlen         = sizeof(int),
 905                .mode           = 0444,
 906                .proc_handler   = proc_dointvec,
 907        },
 908#if defined(CONFIG_LOCKUP_DETECTOR)
 909        {
 910                .procname       = "watchdog",
 911                .data           = &watchdog_user_enabled,
 912                .maxlen         = sizeof(int),
 913                .mode           = 0644,
 914                .proc_handler   = proc_watchdog,
 915                .extra1         = &zero,
 916                .extra2         = &one,
 917        },
 918        {
 919                .procname       = "watchdog_thresh",
 920                .data           = &watchdog_thresh,
 921                .maxlen         = sizeof(int),
 922                .mode           = 0644,
 923                .proc_handler   = proc_watchdog_thresh,
 924                .extra1         = &zero,
 925                .extra2         = &sixty,
 926        },
 927        {
 928                .procname       = "nmi_watchdog",
 929                .data           = &nmi_watchdog_user_enabled,
 930                .maxlen         = sizeof(int),
 931                .mode           = NMI_WATCHDOG_SYSCTL_PERM,
 932                .proc_handler   = proc_nmi_watchdog,
 933                .extra1         = &zero,
 934                .extra2         = &one,
 935        },
 936        {
 937                .procname       = "watchdog_cpumask",
 938                .data           = &watchdog_cpumask_bits,
 939                .maxlen         = NR_CPUS,
 940                .mode           = 0644,
 941                .proc_handler   = proc_watchdog_cpumask,
 942        },
 943#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 944        {
 945                .procname       = "soft_watchdog",
 946                .data           = &soft_watchdog_user_enabled,
 947                .maxlen         = sizeof(int),
 948                .mode           = 0644,
 949                .proc_handler   = proc_soft_watchdog,
 950                .extra1         = &zero,
 951                .extra2         = &one,
 952        },
 953        {
 954                .procname       = "softlockup_panic",
 955                .data           = &softlockup_panic,
 956                .maxlen         = sizeof(int),
 957                .mode           = 0644,
 958                .proc_handler   = proc_dointvec_minmax,
 959                .extra1         = &zero,
 960                .extra2         = &one,
 961        },
 962#ifdef CONFIG_SMP
 963        {
 964                .procname       = "softlockup_all_cpu_backtrace",
 965                .data           = &sysctl_softlockup_all_cpu_backtrace,
 966                .maxlen         = sizeof(int),
 967                .mode           = 0644,
 968                .proc_handler   = proc_dointvec_minmax,
 969                .extra1         = &zero,
 970                .extra2         = &one,
 971        },
 972#endif /* CONFIG_SMP */
 973#endif
 974#ifdef CONFIG_HARDLOCKUP_DETECTOR
 975        {
 976                .procname       = "hardlockup_panic",
 977                .data           = &hardlockup_panic,
 978                .maxlen         = sizeof(int),
 979                .mode           = 0644,
 980                .proc_handler   = proc_dointvec_minmax,
 981                .extra1         = &zero,
 982                .extra2         = &one,
 983        },
 984#ifdef CONFIG_SMP
 985        {
 986                .procname       = "hardlockup_all_cpu_backtrace",
 987                .data           = &sysctl_hardlockup_all_cpu_backtrace,
 988                .maxlen         = sizeof(int),
 989                .mode           = 0644,
 990                .proc_handler   = proc_dointvec_minmax,
 991                .extra1         = &zero,
 992                .extra2         = &one,
 993        },
 994#endif /* CONFIG_SMP */
 995#endif
 996#endif
 997
 998#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 999        {
1000                .procname       = "unknown_nmi_panic",
1001                .data           = &unknown_nmi_panic,
1002                .maxlen         = sizeof (int),
1003                .mode           = 0644,
1004                .proc_handler   = proc_dointvec,
1005        },
1006#endif
1007#if defined(CONFIG_X86)
1008        {
1009                .procname       = "panic_on_unrecovered_nmi",
1010                .data           = &panic_on_unrecovered_nmi,
1011                .maxlen         = sizeof(int),
1012                .mode           = 0644,
1013                .proc_handler   = proc_dointvec,
1014        },
1015        {
1016                .procname       = "panic_on_io_nmi",
1017                .data           = &panic_on_io_nmi,
1018                .maxlen         = sizeof(int),
1019                .mode           = 0644,
1020                .proc_handler   = proc_dointvec,
1021        },
1022#ifdef CONFIG_DEBUG_STACKOVERFLOW
1023        {
1024                .procname       = "panic_on_stackoverflow",
1025                .data           = &sysctl_panic_on_stackoverflow,
1026                .maxlen         = sizeof(int),
1027                .mode           = 0644,
1028                .proc_handler   = proc_dointvec,
1029        },
1030#endif
1031        {
1032                .procname       = "bootloader_type",
1033                .data           = &bootloader_type,
1034                .maxlen         = sizeof (int),
1035                .mode           = 0444,
1036                .proc_handler   = proc_dointvec,
1037        },
1038        {
1039                .procname       = "bootloader_version",
1040                .data           = &bootloader_version,
1041                .maxlen         = sizeof (int),
1042                .mode           = 0444,
1043                .proc_handler   = proc_dointvec,
1044        },
1045        {
1046                .procname       = "io_delay_type",
1047                .data           = &io_delay_type,
1048                .maxlen         = sizeof(int),
1049                .mode           = 0644,
1050                .proc_handler   = proc_dointvec,
1051        },
1052#endif
1053#if defined(CONFIG_MMU)
1054        {
1055                .procname       = "randomize_va_space",
1056                .data           = &randomize_va_space,
1057                .maxlen         = sizeof(int),
1058                .mode           = 0644,
1059                .proc_handler   = proc_dointvec,
1060        },
1061#endif
1062#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1063        {
1064                .procname       = "spin_retry",
1065                .data           = &spin_retry,
1066                .maxlen         = sizeof (int),
1067                .mode           = 0644,
1068                .proc_handler   = proc_dointvec,
1069        },
1070#endif
1071#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1072        {
1073                .procname       = "acpi_video_flags",
1074                .data           = &acpi_realmode_flags,
1075                .maxlen         = sizeof (unsigned long),
1076                .mode           = 0644,
1077                .proc_handler   = proc_doulongvec_minmax,
1078        },
1079#endif
1080#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1081        {
1082                .procname       = "ignore-unaligned-usertrap",
1083                .data           = &no_unaligned_warning,
1084                .maxlen         = sizeof (int),
1085                .mode           = 0644,
1086                .proc_handler   = proc_dointvec,
1087        },
1088#endif
1089#ifdef CONFIG_IA64
1090        {
1091                .procname       = "unaligned-dump-stack",
1092                .data           = &unaligned_dump_stack,
1093                .maxlen         = sizeof (int),
1094                .mode           = 0644,
1095                .proc_handler   = proc_dointvec,
1096        },
1097#endif
1098#ifdef CONFIG_DETECT_HUNG_TASK
1099        {
1100                .procname       = "hung_task_panic",
1101                .data           = &sysctl_hung_task_panic,
1102                .maxlen         = sizeof(int),
1103                .mode           = 0644,
1104                .proc_handler   = proc_dointvec_minmax,
1105                .extra1         = &zero,
1106                .extra2         = &one,
1107        },
1108        {
1109                .procname       = "hung_task_check_count",
1110                .data           = &sysctl_hung_task_check_count,
1111                .maxlen         = sizeof(int),
1112                .mode           = 0644,
1113                .proc_handler   = proc_dointvec_minmax,
1114                .extra1         = &zero,
1115        },
1116        {
1117                .procname       = "hung_task_timeout_secs",
1118                .data           = &sysctl_hung_task_timeout_secs,
1119                .maxlen         = sizeof(unsigned long),
1120                .mode           = 0644,
1121                .proc_handler   = proc_dohung_task_timeout_secs,
1122                .extra2         = &hung_task_timeout_max,
1123        },
1124        {
1125                .procname       = "hung_task_check_interval_secs",
1126                .data           = &sysctl_hung_task_check_interval_secs,
1127                .maxlen         = sizeof(unsigned long),
1128                .mode           = 0644,
1129                .proc_handler   = proc_dohung_task_timeout_secs,
1130                .extra2         = &hung_task_timeout_max,
1131        },
1132        {
1133                .procname       = "hung_task_warnings",
1134                .data           = &sysctl_hung_task_warnings,
1135                .maxlen         = sizeof(int),
1136                .mode           = 0644,
1137                .proc_handler   = proc_dointvec_minmax,
1138                .extra1         = &neg_one,
1139        },
1140#endif
1141#ifdef CONFIG_RT_MUTEXES
1142        {
1143                .procname       = "max_lock_depth",
1144                .data           = &max_lock_depth,
1145                .maxlen         = sizeof(int),
1146                .mode           = 0644,
1147                .proc_handler   = proc_dointvec,
1148        },
1149#endif
1150        {
1151                .procname       = "poweroff_cmd",
1152                .data           = &poweroff_cmd,
1153                .maxlen         = POWEROFF_CMD_PATH_LEN,
1154                .mode           = 0644,
1155                .proc_handler   = proc_dostring,
1156        },
1157#ifdef CONFIG_KEYS
1158        {
1159                .procname       = "keys",
1160                .mode           = 0555,
1161                .child          = key_sysctls,
1162        },
1163#endif
1164#ifdef CONFIG_PERF_EVENTS
1165        /*
1166         * User-space scripts rely on the existence of this file
1167         * as a feature check for perf_events being enabled.
1168         *
1169         * So it's an ABI, do not remove!
1170         */
1171        {
1172                .procname       = "perf_event_paranoid",
1173                .data           = &sysctl_perf_event_paranoid,
1174                .maxlen         = sizeof(sysctl_perf_event_paranoid),
1175                .mode           = 0644,
1176                .proc_handler   = proc_dointvec,
1177        },
1178        {
1179                .procname       = "perf_event_mlock_kb",
1180                .data           = &sysctl_perf_event_mlock,
1181                .maxlen         = sizeof(sysctl_perf_event_mlock),
1182                .mode           = 0644,
1183                .proc_handler   = proc_dointvec,
1184        },
1185        {
1186                .procname       = "perf_event_max_sample_rate",
1187                .data           = &sysctl_perf_event_sample_rate,
1188                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1189                .mode           = 0644,
1190                .proc_handler   = perf_proc_update_handler,
1191                .extra1         = &one,
1192        },
1193        {
1194                .procname       = "perf_cpu_time_max_percent",
1195                .data           = &sysctl_perf_cpu_time_max_percent,
1196                .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1197                .mode           = 0644,
1198                .proc_handler   = perf_cpu_time_max_percent_handler,
1199                .extra1         = &zero,
1200                .extra2         = &one_hundred,
1201        },
1202        {
1203                .procname       = "perf_event_max_stack",
1204                .data           = &sysctl_perf_event_max_stack,
1205                .maxlen         = sizeof(sysctl_perf_event_max_stack),
1206                .mode           = 0644,
1207                .proc_handler   = perf_event_max_stack_handler,
1208                .extra1         = &zero,
1209                .extra2         = &six_hundred_forty_kb,
1210        },
1211        {
1212                .procname       = "perf_event_max_contexts_per_stack",
1213                .data           = &sysctl_perf_event_max_contexts_per_stack,
1214                .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1215                .mode           = 0644,
1216                .proc_handler   = perf_event_max_stack_handler,
1217                .extra1         = &zero,
1218                .extra2         = &one_thousand,
1219        },
1220#endif
1221        {
1222                .procname       = "panic_on_warn",
1223                .data           = &panic_on_warn,
1224                .maxlen         = sizeof(int),
1225                .mode           = 0644,
1226                .proc_handler   = proc_dointvec_minmax,
1227                .extra1         = &zero,
1228                .extra2         = &one,
1229        },
1230#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1231        {
1232                .procname       = "timer_migration",
1233                .data           = &sysctl_timer_migration,
1234                .maxlen         = sizeof(unsigned int),
1235                .mode           = 0644,
1236                .proc_handler   = timer_migration_handler,
1237                .extra1         = &zero,
1238                .extra2         = &one,
1239        },
1240#endif
1241#ifdef CONFIG_BPF_SYSCALL
1242        {
1243                .procname       = "unprivileged_bpf_disabled",
1244                .data           = &sysctl_unprivileged_bpf_disabled,
1245                .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1246                .mode           = 0644,
1247                /* only handle a transition from default "0" to "1" */
1248                .proc_handler   = proc_dointvec_minmax,
1249                .extra1         = &one,
1250                .extra2         = &one,
1251        },
1252        {
1253                .procname       = "bpf_stats_enabled",
1254                .data           = &sysctl_bpf_stats_enabled,
1255                .maxlen         = sizeof(sysctl_bpf_stats_enabled),
1256                .mode           = 0644,
1257                .proc_handler   = proc_dointvec_minmax_bpf_stats,
1258                .extra1         = &zero,
1259                .extra2         = &one,
1260        },
1261#endif
1262#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1263        {
1264                .procname       = "panic_on_rcu_stall",
1265                .data           = &sysctl_panic_on_rcu_stall,
1266                .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1267                .mode           = 0644,
1268                .proc_handler   = proc_dointvec_minmax,
1269                .extra1         = &zero,
1270                .extra2         = &one,
1271        },
1272#endif
1273#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1274        {
1275                .procname       = "stack_erasing",
1276                .data           = NULL,
1277                .maxlen         = sizeof(int),
1278                .mode           = 0600,
1279                .proc_handler   = stack_erasing_sysctl,
1280                .extra1         = &zero,
1281                .extra2         = &one,
1282        },
1283#endif
1284        { }
1285};
1286
1287static struct ctl_table vm_table[] = {
1288        {
1289                .procname       = "overcommit_memory",
1290                .data           = &sysctl_overcommit_memory,
1291                .maxlen         = sizeof(sysctl_overcommit_memory),
1292                .mode           = 0644,
1293                .proc_handler   = proc_dointvec_minmax,
1294                .extra1         = &zero,
1295                .extra2         = &two,
1296        },
1297        {
1298                .procname       = "panic_on_oom",
1299                .data           = &sysctl_panic_on_oom,
1300                .maxlen         = sizeof(sysctl_panic_on_oom),
1301                .mode           = 0644,
1302                .proc_handler   = proc_dointvec_minmax,
1303                .extra1         = &zero,
1304                .extra2         = &two,
1305        },
1306        {
1307                .procname       = "oom_kill_allocating_task",
1308                .data           = &sysctl_oom_kill_allocating_task,
1309                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1310                .mode           = 0644,
1311                .proc_handler   = proc_dointvec,
1312        },
1313        {
1314                .procname       = "oom_dump_tasks",
1315                .data           = &sysctl_oom_dump_tasks,
1316                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1317                .mode           = 0644,
1318                .proc_handler   = proc_dointvec,
1319        },
1320        {
1321                .procname       = "overcommit_ratio",
1322                .data           = &sysctl_overcommit_ratio,
1323                .maxlen         = sizeof(sysctl_overcommit_ratio),
1324                .mode           = 0644,
1325                .proc_handler   = overcommit_ratio_handler,
1326        },
1327        {
1328                .procname       = "overcommit_kbytes",
1329                .data           = &sysctl_overcommit_kbytes,
1330                .maxlen         = sizeof(sysctl_overcommit_kbytes),
1331                .mode           = 0644,
1332                .proc_handler   = overcommit_kbytes_handler,
1333        },
1334        {
1335                .procname       = "page-cluster", 
1336                .data           = &page_cluster,
1337                .maxlen         = sizeof(int),
1338                .mode           = 0644,
1339                .proc_handler   = proc_dointvec_minmax,
1340                .extra1         = &zero,
1341        },
1342        {
1343                .procname       = "dirty_background_ratio",
1344                .data           = &dirty_background_ratio,
1345                .maxlen         = sizeof(dirty_background_ratio),
1346                .mode           = 0644,
1347                .proc_handler   = dirty_background_ratio_handler,
1348                .extra1         = &zero,
1349                .extra2         = &one_hundred,
1350        },
1351        {
1352                .procname       = "dirty_background_bytes",
1353                .data           = &dirty_background_bytes,
1354                .maxlen         = sizeof(dirty_background_bytes),
1355                .mode           = 0644,
1356                .proc_handler   = dirty_background_bytes_handler,
1357                .extra1         = &one_ul,
1358        },
1359        {
1360                .procname       = "dirty_ratio",
1361                .data           = &vm_dirty_ratio,
1362                .maxlen         = sizeof(vm_dirty_ratio),
1363                .mode           = 0644,
1364                .proc_handler   = dirty_ratio_handler,
1365                .extra1         = &zero,
1366                .extra2         = &one_hundred,
1367        },
1368        {
1369                .procname       = "dirty_bytes",
1370                .data           = &vm_dirty_bytes,
1371                .maxlen         = sizeof(vm_dirty_bytes),
1372                .mode           = 0644,
1373                .proc_handler   = dirty_bytes_handler,
1374                .extra1         = &dirty_bytes_min,
1375        },
1376        {
1377                .procname       = "dirty_writeback_centisecs",
1378                .data           = &dirty_writeback_interval,
1379                .maxlen         = sizeof(dirty_writeback_interval),
1380                .mode           = 0644,
1381                .proc_handler   = dirty_writeback_centisecs_handler,
1382        },
1383        {
1384                .procname       = "dirty_expire_centisecs",
1385                .data           = &dirty_expire_interval,
1386                .maxlen         = sizeof(dirty_expire_interval),
1387                .mode           = 0644,
1388                .proc_handler   = proc_dointvec_minmax,
1389                .extra1         = &zero,
1390        },
1391        {
1392                .procname       = "dirtytime_expire_seconds",
1393                .data           = &dirtytime_expire_interval,
1394                .maxlen         = sizeof(dirtytime_expire_interval),
1395                .mode           = 0644,
1396                .proc_handler   = dirtytime_interval_handler,
1397                .extra1         = &zero,
1398        },
1399        {
1400                .procname       = "swappiness",
1401                .data           = &vm_swappiness,
1402                .maxlen         = sizeof(vm_swappiness),
1403                .mode           = 0644,
1404                .proc_handler   = proc_dointvec_minmax,
1405                .extra1         = &zero,
1406                .extra2         = &one_hundred,
1407        },
1408#ifdef CONFIG_HUGETLB_PAGE
1409        {
1410                .procname       = "nr_hugepages",
1411                .data           = NULL,
1412                .maxlen         = sizeof(unsigned long),
1413                .mode           = 0644,
1414                .proc_handler   = hugetlb_sysctl_handler,
1415        },
1416#ifdef CONFIG_NUMA
1417        {
1418                .procname       = "nr_hugepages_mempolicy",
1419                .data           = NULL,
1420                .maxlen         = sizeof(unsigned long),
1421                .mode           = 0644,
1422                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1423        },
1424        {
1425                .procname               = "numa_stat",
1426                .data                   = &sysctl_vm_numa_stat,
1427                .maxlen                 = sizeof(int),
1428                .mode                   = 0644,
1429                .proc_handler   = sysctl_vm_numa_stat_handler,
1430                .extra1                 = &zero,
1431                .extra2                 = &one,
1432        },
1433#endif
1434         {
1435                .procname       = "hugetlb_shm_group",
1436                .data           = &sysctl_hugetlb_shm_group,
1437                .maxlen         = sizeof(gid_t),
1438                .mode           = 0644,
1439                .proc_handler   = proc_dointvec,
1440         },
1441        {
1442                .procname       = "nr_overcommit_hugepages",
1443                .data           = NULL,
1444                .maxlen         = sizeof(unsigned long),
1445                .mode           = 0644,
1446                .proc_handler   = hugetlb_overcommit_handler,
1447        },
1448#endif
1449        {
1450                .procname       = "lowmem_reserve_ratio",
1451                .data           = &sysctl_lowmem_reserve_ratio,
1452                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1453                .mode           = 0644,
1454                .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1455        },
1456        {
1457                .procname       = "drop_caches",
1458                .data           = &sysctl_drop_caches,
1459                .maxlen         = sizeof(int),
1460                .mode           = 0644,
1461                .proc_handler   = drop_caches_sysctl_handler,
1462                .extra1         = &one,
1463                .extra2         = &four,
1464        },
1465#ifdef CONFIG_COMPACTION
1466        {
1467                .procname       = "compact_memory",
1468                .data           = &sysctl_compact_memory,
1469                .maxlen         = sizeof(int),
1470                .mode           = 0200,
1471                .proc_handler   = sysctl_compaction_handler,
1472        },
1473        {
1474                .procname       = "extfrag_threshold",
1475                .data           = &sysctl_extfrag_threshold,
1476                .maxlen         = sizeof(int),
1477                .mode           = 0644,
1478                .proc_handler   = proc_dointvec_minmax,
1479                .extra1         = &min_extfrag_threshold,
1480                .extra2         = &max_extfrag_threshold,
1481        },
1482        {
1483                .procname       = "compact_unevictable_allowed",
1484                .data           = &sysctl_compact_unevictable_allowed,
1485                .maxlen         = sizeof(int),
1486                .mode           = 0644,
1487                .proc_handler   = proc_dointvec,
1488                .extra1         = &zero,
1489                .extra2         = &one,
1490        },
1491
1492#endif /* CONFIG_COMPACTION */
1493        {
1494                .procname       = "min_free_kbytes",
1495                .data           = &min_free_kbytes,
1496                .maxlen         = sizeof(min_free_kbytes),
1497                .mode           = 0644,
1498                .proc_handler   = min_free_kbytes_sysctl_handler,
1499                .extra1         = &zero,
1500        },
1501        {
1502                .procname       = "watermark_boost_factor",
1503                .data           = &watermark_boost_factor,
1504                .maxlen         = sizeof(watermark_boost_factor),
1505                .mode           = 0644,
1506                .proc_handler   = watermark_boost_factor_sysctl_handler,
1507                .extra1         = &zero,
1508        },
1509        {
1510                .procname       = "watermark_scale_factor",
1511                .data           = &watermark_scale_factor,
1512                .maxlen         = sizeof(watermark_scale_factor),
1513                .mode           = 0644,
1514                .proc_handler   = watermark_scale_factor_sysctl_handler,
1515                .extra1         = &one,
1516                .extra2         = &one_thousand,
1517        },
1518        {
1519                .procname       = "percpu_pagelist_fraction",
1520                .data           = &percpu_pagelist_fraction,
1521                .maxlen         = sizeof(percpu_pagelist_fraction),
1522                .mode           = 0644,
1523                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1524                .extra1         = &zero,
1525        },
1526#ifdef CONFIG_MMU
1527        {
1528                .procname       = "max_map_count",
1529                .data           = &sysctl_max_map_count,
1530                .maxlen         = sizeof(sysctl_max_map_count),
1531                .mode           = 0644,
1532                .proc_handler   = proc_dointvec_minmax,
1533                .extra1         = &zero,
1534        },
1535#else
1536        {
1537                .procname       = "nr_trim_pages",
1538                .data           = &sysctl_nr_trim_pages,
1539                .maxlen         = sizeof(sysctl_nr_trim_pages),
1540                .mode           = 0644,
1541                .proc_handler   = proc_dointvec_minmax,
1542                .extra1         = &zero,
1543        },
1544#endif
1545        {
1546                .procname       = "laptop_mode",
1547                .data           = &laptop_mode,
1548                .maxlen         = sizeof(laptop_mode),
1549                .mode           = 0644,
1550                .proc_handler   = proc_dointvec_jiffies,
1551        },
1552        {
1553                .procname       = "block_dump",
1554                .data           = &block_dump,
1555                .maxlen         = sizeof(block_dump),
1556                .mode           = 0644,
1557                .proc_handler   = proc_dointvec,
1558                .extra1         = &zero,
1559        },
1560        {
1561                .procname       = "vfs_cache_pressure",
1562                .data           = &sysctl_vfs_cache_pressure,
1563                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1564                .mode           = 0644,
1565                .proc_handler   = proc_dointvec,
1566                .extra1         = &zero,
1567        },
1568#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1569        {
1570                .procname       = "legacy_va_layout",
1571                .data           = &sysctl_legacy_va_layout,
1572                .maxlen         = sizeof(sysctl_legacy_va_layout),
1573                .mode           = 0644,
1574                .proc_handler   = proc_dointvec,
1575                .extra1         = &zero,
1576        },
1577#endif
1578#ifdef CONFIG_NUMA
1579        {
1580                .procname       = "zone_reclaim_mode",
1581                .data           = &node_reclaim_mode,
1582                .maxlen         = sizeof(node_reclaim_mode),
1583                .mode           = 0644,
1584                .proc_handler   = proc_dointvec,
1585                .extra1         = &zero,
1586        },
1587        {
1588                .procname       = "min_unmapped_ratio",
1589                .data           = &sysctl_min_unmapped_ratio,
1590                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1591                .mode           = 0644,
1592                .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1593                .extra1         = &zero,
1594                .extra2         = &one_hundred,
1595        },
1596        {
1597                .procname       = "min_slab_ratio",
1598                .data           = &sysctl_min_slab_ratio,
1599                .maxlen         = sizeof(sysctl_min_slab_ratio),
1600                .mode           = 0644,
1601                .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1602                .extra1         = &zero,
1603                .extra2         = &one_hundred,
1604        },
1605#endif
1606#ifdef CONFIG_SMP
1607        {
1608                .procname       = "stat_interval",
1609                .data           = &sysctl_stat_interval,
1610                .maxlen         = sizeof(sysctl_stat_interval),
1611                .mode           = 0644,
1612                .proc_handler   = proc_dointvec_jiffies,
1613        },
1614        {
1615                .procname       = "stat_refresh",
1616                .data           = NULL,
1617                .maxlen         = 0,
1618                .mode           = 0600,
1619                .proc_handler   = vmstat_refresh,
1620        },
1621#endif
1622#ifdef CONFIG_MMU
1623        {
1624                .procname       = "mmap_min_addr",
1625                .data           = &dac_mmap_min_addr,
1626                .maxlen         = sizeof(unsigned long),
1627                .mode           = 0644,
1628                .proc_handler   = mmap_min_addr_handler,
1629        },
1630#endif
1631#ifdef CONFIG_NUMA
1632        {
1633                .procname       = "numa_zonelist_order",
1634                .data           = &numa_zonelist_order,
1635                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1636                .mode           = 0644,
1637                .proc_handler   = numa_zonelist_order_handler,
1638        },
1639#endif
1640#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1641   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1642        {
1643                .procname       = "vdso_enabled",
1644#ifdef CONFIG_X86_32
1645                .data           = &vdso32_enabled,
1646                .maxlen         = sizeof(vdso32_enabled),
1647#else
1648                .data           = &vdso_enabled,
1649                .maxlen         = sizeof(vdso_enabled),
1650#endif
1651                .mode           = 0644,
1652                .proc_handler   = proc_dointvec,
1653                .extra1         = &zero,
1654        },
1655#endif
1656#ifdef CONFIG_HIGHMEM
1657        {
1658                .procname       = "highmem_is_dirtyable",
1659                .data           = &vm_highmem_is_dirtyable,
1660                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1661                .mode           = 0644,
1662                .proc_handler   = proc_dointvec_minmax,
1663                .extra1         = &zero,
1664                .extra2         = &one,
1665        },
1666#endif
1667#ifdef CONFIG_MEMORY_FAILURE
1668        {
1669                .procname       = "memory_failure_early_kill",
1670                .data           = &sysctl_memory_failure_early_kill,
1671                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1672                .mode           = 0644,
1673                .proc_handler   = proc_dointvec_minmax,
1674                .extra1         = &zero,
1675                .extra2         = &one,
1676        },
1677        {
1678                .procname       = "memory_failure_recovery",
1679                .data           = &sysctl_memory_failure_recovery,
1680                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1681                .mode           = 0644,
1682                .proc_handler   = proc_dointvec_minmax,
1683                .extra1         = &zero,
1684                .extra2         = &one,
1685        },
1686#endif
1687        {
1688                .procname       = "user_reserve_kbytes",
1689                .data           = &sysctl_user_reserve_kbytes,
1690                .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1691                .mode           = 0644,
1692                .proc_handler   = proc_doulongvec_minmax,
1693        },
1694        {
1695                .procname       = "admin_reserve_kbytes",
1696                .data           = &sysctl_admin_reserve_kbytes,
1697                .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1698                .mode           = 0644,
1699                .proc_handler   = proc_doulongvec_minmax,
1700        },
1701#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1702        {
1703                .procname       = "mmap_rnd_bits",
1704                .data           = &mmap_rnd_bits,
1705                .maxlen         = sizeof(mmap_rnd_bits),
1706                .mode           = 0600,
1707                .proc_handler   = proc_dointvec_minmax,
1708                .extra1         = (void *)&mmap_rnd_bits_min,
1709                .extra2         = (void *)&mmap_rnd_bits_max,
1710        },
1711#endif
1712#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1713        {
1714                .procname       = "mmap_rnd_compat_bits",
1715                .data           = &mmap_rnd_compat_bits,
1716                .maxlen         = sizeof(mmap_rnd_compat_bits),
1717                .mode           = 0600,
1718                .proc_handler   = proc_dointvec_minmax,
1719                .extra1         = (void *)&mmap_rnd_compat_bits_min,
1720                .extra2         = (void *)&mmap_rnd_compat_bits_max,
1721        },
1722#endif
1723        { }
1724};
1725
1726static struct ctl_table fs_table[] = {
1727        {
1728                .procname       = "inode-nr",
1729                .data           = &inodes_stat,
1730                .maxlen         = 2*sizeof(long),
1731                .mode           = 0444,
1732                .proc_handler   = proc_nr_inodes,
1733        },
1734        {
1735                .procname       = "inode-state",
1736                .data           = &inodes_stat,
1737                .maxlen         = 7*sizeof(long),
1738                .mode           = 0444,
1739                .proc_handler   = proc_nr_inodes,
1740        },
1741        {
1742                .procname       = "file-nr",
1743                .data           = &files_stat,
1744                .maxlen         = sizeof(files_stat),
1745                .mode           = 0444,
1746                .proc_handler   = proc_nr_files,
1747        },
1748        {
1749                .procname       = "file-max",
1750                .data           = &files_stat.max_files,
1751                .maxlen         = sizeof(files_stat.max_files),
1752                .mode           = 0644,
1753                .proc_handler   = proc_doulongvec_minmax,
1754                .extra1         = &zero_ul,
1755                .extra2         = &long_max,
1756        },
1757        {
1758                .procname       = "nr_open",
1759                .data           = &sysctl_nr_open,
1760                .maxlen         = sizeof(unsigned int),
1761                .mode           = 0644,
1762                .proc_handler   = proc_dointvec_minmax,
1763                .extra1         = &sysctl_nr_open_min,
1764                .extra2         = &sysctl_nr_open_max,
1765        },
1766        {
1767                .procname       = "dentry-state",
1768                .data           = &dentry_stat,
1769                .maxlen         = 6*sizeof(long),
1770                .mode           = 0444,
1771                .proc_handler   = proc_nr_dentry,
1772        },
1773        {
1774                .procname       = "overflowuid",
1775                .data           = &fs_overflowuid,
1776                .maxlen         = sizeof(int),
1777                .mode           = 0644,
1778                .proc_handler   = proc_dointvec_minmax,
1779                .extra1         = &minolduid,
1780                .extra2         = &maxolduid,
1781        },
1782        {
1783                .procname       = "overflowgid",
1784                .data           = &fs_overflowgid,
1785                .maxlen         = sizeof(int),
1786                .mode           = 0644,
1787                .proc_handler   = proc_dointvec_minmax,
1788                .extra1         = &minolduid,
1789                .extra2         = &maxolduid,
1790        },
1791#ifdef CONFIG_FILE_LOCKING
1792        {
1793                .procname       = "leases-enable",
1794                .data           = &leases_enable,
1795                .maxlen         = sizeof(int),
1796                .mode           = 0644,
1797                .proc_handler   = proc_dointvec,
1798        },
1799#endif
1800#ifdef CONFIG_DNOTIFY
1801        {
1802                .procname       = "dir-notify-enable",
1803                .data           = &dir_notify_enable,
1804                .maxlen         = sizeof(int),
1805                .mode           = 0644,
1806                .proc_handler   = proc_dointvec,
1807        },
1808#endif
1809#ifdef CONFIG_MMU
1810#ifdef CONFIG_FILE_LOCKING
1811        {
1812                .procname       = "lease-break-time",
1813                .data           = &lease_break_time,
1814                .maxlen         = sizeof(int),
1815                .mode           = 0644,
1816                .proc_handler   = proc_dointvec,
1817        },
1818#endif
1819#ifdef CONFIG_AIO
1820        {
1821                .procname       = "aio-nr",
1822                .data           = &aio_nr,
1823                .maxlen         = sizeof(aio_nr),
1824                .mode           = 0444,
1825                .proc_handler   = proc_doulongvec_minmax,
1826        },
1827        {
1828                .procname       = "aio-max-nr",
1829                .data           = &aio_max_nr,
1830                .maxlen         = sizeof(aio_max_nr),
1831                .mode           = 0644,
1832                .proc_handler   = proc_doulongvec_minmax,
1833        },
1834#endif /* CONFIG_AIO */
1835#ifdef CONFIG_INOTIFY_USER
1836        {
1837                .procname       = "inotify",
1838                .mode           = 0555,
1839                .child          = inotify_table,
1840        },
1841#endif  
1842#ifdef CONFIG_EPOLL
1843        {
1844                .procname       = "epoll",
1845                .mode           = 0555,
1846                .child          = epoll_table,
1847        },
1848#endif
1849#endif
1850        {
1851                .procname       = "protected_symlinks",
1852                .data           = &sysctl_protected_symlinks,
1853                .maxlen         = sizeof(int),
1854                .mode           = 0600,
1855                .proc_handler   = proc_dointvec_minmax,
1856                .extra1         = &zero,
1857                .extra2         = &one,
1858        },
1859        {
1860                .procname       = "protected_hardlinks",
1861                .data           = &sysctl_protected_hardlinks,
1862                .maxlen         = sizeof(int),
1863                .mode           = 0600,
1864                .proc_handler   = proc_dointvec_minmax,
1865                .extra1         = &zero,
1866                .extra2         = &one,
1867        },
1868        {
1869                .procname       = "protected_fifos",
1870                .data           = &sysctl_protected_fifos,
1871                .maxlen         = sizeof(int),
1872                .mode           = 0600,
1873                .proc_handler   = proc_dointvec_minmax,
1874                .extra1         = &zero,
1875                .extra2         = &two,
1876        },
1877        {
1878                .procname       = "protected_regular",
1879                .data           = &sysctl_protected_regular,
1880                .maxlen         = sizeof(int),
1881                .mode           = 0600,
1882                .proc_handler   = proc_dointvec_minmax,
1883                .extra1         = &zero,
1884                .extra2         = &two,
1885        },
1886        {
1887                .procname       = "suid_dumpable",
1888                .data           = &suid_dumpable,
1889                .maxlen         = sizeof(int),
1890                .mode           = 0644,
1891                .proc_handler   = proc_dointvec_minmax_coredump,
1892                .extra1         = &zero,
1893                .extra2         = &two,
1894        },
1895#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1896        {
1897                .procname       = "binfmt_misc",
1898                .mode           = 0555,
1899                .child          = sysctl_mount_point,
1900        },
1901#endif
1902        {
1903                .procname       = "pipe-max-size",
1904                .data           = &pipe_max_size,
1905                .maxlen         = sizeof(pipe_max_size),
1906                .mode           = 0644,
1907                .proc_handler   = proc_dopipe_max_size,
1908        },
1909        {
1910                .procname       = "pipe-user-pages-hard",
1911                .data           = &pipe_user_pages_hard,
1912                .maxlen         = sizeof(pipe_user_pages_hard),
1913                .mode           = 0644,
1914                .proc_handler   = proc_doulongvec_minmax,
1915        },
1916        {
1917                .procname       = "pipe-user-pages-soft",
1918                .data           = &pipe_user_pages_soft,
1919                .maxlen         = sizeof(pipe_user_pages_soft),
1920                .mode           = 0644,
1921                .proc_handler   = proc_doulongvec_minmax,
1922        },
1923        {
1924                .procname       = "mount-max",
1925                .data           = &sysctl_mount_max,
1926                .maxlen         = sizeof(unsigned int),
1927                .mode           = 0644,
1928                .proc_handler   = proc_dointvec_minmax,
1929                .extra1         = &one,
1930        },
1931        { }
1932};
1933
1934static struct ctl_table debug_table[] = {
1935#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1936        {
1937                .procname       = "exception-trace",
1938                .data           = &show_unhandled_signals,
1939                .maxlen         = sizeof(int),
1940                .mode           = 0644,
1941                .proc_handler   = proc_dointvec
1942        },
1943#endif
1944#if defined(CONFIG_OPTPROBES)
1945        {
1946                .procname       = "kprobes-optimization",
1947                .data           = &sysctl_kprobes_optimization,
1948                .maxlen         = sizeof(int),
1949                .mode           = 0644,
1950                .proc_handler   = proc_kprobes_optimization_handler,
1951                .extra1         = &zero,
1952                .extra2         = &one,
1953        },
1954#endif
1955        { }
1956};
1957
1958static struct ctl_table dev_table[] = {
1959        { }
1960};
1961
1962int __init sysctl_init(void)
1963{
1964        struct ctl_table_header *hdr;
1965
1966        hdr = register_sysctl_table(sysctl_base_table);
1967        kmemleak_not_leak(hdr);
1968        return 0;
1969}
1970
1971#endif /* CONFIG_SYSCTL */
1972
1973/*
1974 * /proc/sys support
1975 */
1976
1977#ifdef CONFIG_PROC_SYSCTL
1978
1979static int _proc_do_string(char *data, int maxlen, int write,
1980                           char __user *buffer,
1981                           size_t *lenp, loff_t *ppos)
1982{
1983        size_t len;
1984        char __user *p;
1985        char c;
1986
1987        if (!data || !maxlen || !*lenp) {
1988                *lenp = 0;
1989                return 0;
1990        }
1991
1992        if (write) {
1993                if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1994                        /* Only continue writes not past the end of buffer. */
1995                        len = strlen(data);
1996                        if (len > maxlen - 1)
1997                                len = maxlen - 1;
1998
1999                        if (*ppos > len)
2000                                return 0;
2001                        len = *ppos;
2002                } else {
2003                        /* Start writing from beginning of buffer. */
2004                        len = 0;
2005                }
2006
2007                *ppos += *lenp;
2008                p = buffer;
2009                while ((p - buffer) < *lenp && len < maxlen - 1) {
2010                        if (get_user(c, p++))
2011                                return -EFAULT;
2012                        if (c == 0 || c == '\n')
2013                                break;
2014                        data[len++] = c;
2015                }
2016                data[len] = 0;
2017        } else {
2018                len = strlen(data);
2019                if (len > maxlen)
2020                        len = maxlen;
2021
2022                if (*ppos > len) {
2023                        *lenp = 0;
2024                        return 0;
2025                }
2026
2027                data += *ppos;
2028                len  -= *ppos;
2029
2030                if (len > *lenp)
2031                        len = *lenp;
2032                if (len)
2033                        if (copy_to_user(buffer, data, len))
2034                                return -EFAULT;
2035                if (len < *lenp) {
2036                        if (put_user('\n', buffer + len))
2037                                return -EFAULT;
2038                        len++;
2039                }
2040                *lenp = len;
2041                *ppos += len;
2042        }
2043        return 0;
2044}
2045
2046static void warn_sysctl_write(struct ctl_table *table)
2047{
2048        pr_warn_once("%s wrote to %s when file position was not 0!\n"
2049                "This will not be supported in the future. To silence this\n"
2050                "warning, set kernel.sysctl_writes_strict = -1\n",
2051                current->comm, table->procname);
2052}
2053
2054/**
2055 * proc_first_pos_non_zero_ignore - check if first position is allowed
2056 * @ppos: file position
2057 * @table: the sysctl table
2058 *
2059 * Returns true if the first position is non-zero and the sysctl_writes_strict
2060 * mode indicates this is not allowed for numeric input types. String proc
2061 * handlers can ignore the return value.
2062 */
2063static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2064                                           struct ctl_table *table)
2065{
2066        if (!*ppos)
2067                return false;
2068
2069        switch (sysctl_writes_strict) {
2070        case SYSCTL_WRITES_STRICT:
2071                return true;
2072        case SYSCTL_WRITES_WARN:
2073                warn_sysctl_write(table);
2074                return false;
2075        default:
2076                return false;
2077        }
2078}
2079
2080/**
2081 * proc_dostring - read a string sysctl
2082 * @table: the sysctl table
2083 * @write: %TRUE if this is a write to the sysctl file
2084 * @buffer: the user buffer
2085 * @lenp: the size of the user buffer
2086 * @ppos: file position
2087 *
2088 * Reads/writes a string from/to the user buffer. If the kernel
2089 * buffer provided is not large enough to hold the string, the
2090 * string is truncated. The copied string is %NULL-terminated.
2091 * If the string is being read by the user process, it is copied
2092 * and a newline '\n' is added. It is truncated if the buffer is
2093 * not large enough.
2094 *
2095 * Returns 0 on success.
2096 */
2097int proc_dostring(struct ctl_table *table, int write,
2098                  void __user *buffer, size_t *lenp, loff_t *ppos)
2099{
2100        if (write)
2101                proc_first_pos_non_zero_ignore(ppos, table);
2102
2103        return _proc_do_string((char *)(table->data), table->maxlen, write,
2104                               (char __user *)buffer, lenp, ppos);
2105}
2106
2107static size_t proc_skip_spaces(char **buf)
2108{
2109        size_t ret;
2110        char *tmp = skip_spaces(*buf);
2111        ret = tmp - *buf;
2112        *buf = tmp;
2113        return ret;
2114}
2115
2116static void proc_skip_char(char **buf, size_t *size, const char v)
2117{
2118        while (*size) {
2119                if (**buf != v)
2120                        break;
2121                (*size)--;
2122                (*buf)++;
2123        }
2124}
2125
2126/**
2127 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2128 *                   fail on overflow
2129 *
2130 * @cp: kernel buffer containing the string to parse
2131 * @endp: pointer to store the trailing characters
2132 * @base: the base to use
2133 * @res: where the parsed integer will be stored
2134 *
2135 * In case of success 0 is returned and @res will contain the parsed integer,
2136 * @endp will hold any trailing characters.
2137 * This function will fail the parse on overflow. If there wasn't an overflow
2138 * the function will defer the decision what characters count as invalid to the
2139 * caller.
2140 */
2141static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2142                           unsigned long *res)
2143{
2144        unsigned long long result;
2145        unsigned int rv;
2146
2147        cp = _parse_integer_fixup_radix(cp, &base);
2148        rv = _parse_integer(cp, base, &result);
2149        if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2150                return -ERANGE;
2151
2152        cp += rv;
2153
2154        if (endp)
2155                *endp = (char *)cp;
2156
2157        *res = (unsigned long)result;
2158        return 0;
2159}
2160
2161#define TMPBUFLEN 22
2162/**
2163 * proc_get_long - reads an ASCII formatted integer from a user buffer
2164 *
2165 * @buf: a kernel buffer
2166 * @size: size of the kernel buffer
2167 * @val: this is where the number will be stored
2168 * @neg: set to %TRUE if number is negative
2169 * @perm_tr: a vector which contains the allowed trailers
2170 * @perm_tr_len: size of the perm_tr vector
2171 * @tr: pointer to store the trailer character
2172 *
2173 * In case of success %0 is returned and @buf and @size are updated with
2174 * the amount of bytes read. If @tr is non-NULL and a trailing
2175 * character exists (size is non-zero after returning from this
2176 * function), @tr is updated with the trailing character.
2177 */
2178static int proc_get_long(char **buf, size_t *size,
2179                          unsigned long *val, bool *neg,
2180                          const char *perm_tr, unsigned perm_tr_len, char *tr)
2181{
2182        int len;
2183        char *p, tmp[TMPBUFLEN];
2184
2185        if (!*size)
2186                return -EINVAL;
2187
2188        len = *size;
2189        if (len > TMPBUFLEN - 1)
2190                len = TMPBUFLEN - 1;
2191
2192        memcpy(tmp, *buf, len);
2193
2194        tmp[len] = 0;
2195        p = tmp;
2196        if (*p == '-' && *size > 1) {
2197                *neg = true;
2198                p++;
2199        } else
2200                *neg = false;
2201        if (!isdigit(*p))
2202                return -EINVAL;
2203
2204        if (strtoul_lenient(p, &p, 0, val))
2205                return -EINVAL;
2206
2207        len = p - tmp;
2208
2209        /* We don't know if the next char is whitespace thus we may accept
2210         * invalid integers (e.g. 1234...a) or two integers instead of one
2211         * (e.g. 123...1). So lets not allow such large numbers. */
2212        if (len == TMPBUFLEN - 1)
2213                return -EINVAL;
2214
2215        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2216                return -EINVAL;
2217
2218        if (tr && (len < *size))
2219                *tr = *p;
2220
2221        *buf += len;
2222        *size -= len;
2223
2224        return 0;
2225}
2226
2227/**
2228 * proc_put_long - converts an integer to a decimal ASCII formatted string
2229 *
2230 * @buf: the user buffer
2231 * @size: the size of the user buffer
2232 * @val: the integer to be converted
2233 * @neg: sign of the number, %TRUE for negative
2234 *
2235 * In case of success %0 is returned and @buf and @size are updated with
2236 * the amount of bytes written.
2237 */
2238static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2239                          bool neg)
2240{
2241        int len;
2242        char tmp[TMPBUFLEN], *p = tmp;
2243
2244        sprintf(p, "%s%lu", neg ? "-" : "", val);
2245        len = strlen(tmp);
2246        if (len > *size)
2247                len = *size;
2248        if (copy_to_user(*buf, tmp, len))
2249                return -EFAULT;
2250        *size -= len;
2251        *buf += len;
2252        return 0;
2253}
2254#undef TMPBUFLEN
2255
2256static int proc_put_char(void __user **buf, size_t *size, char c)
2257{
2258        if (*size) {
2259                char __user **buffer = (char __user **)buf;
2260                if (put_user(c, *buffer))
2261                        return -EFAULT;
2262                (*size)--, (*buffer)++;
2263                *buf = *buffer;
2264        }
2265        return 0;
2266}
2267
2268static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2269                                 int *valp,
2270                                 int write, void *data)
2271{
2272        if (write) {
2273                if (*negp) {
2274                        if (*lvalp > (unsigned long) INT_MAX + 1)
2275                                return -EINVAL;
2276                        *valp = -*lvalp;
2277                } else {
2278                        if (*lvalp > (unsigned long) INT_MAX)
2279                                return -EINVAL;
2280                        *valp = *lvalp;
2281                }
2282        } else {
2283                int val = *valp;
2284                if (val < 0) {
2285                        *negp = true;
2286                        *lvalp = -(unsigned long)val;
2287                } else {
2288                        *negp = false;
2289                        *lvalp = (unsigned long)val;
2290                }
2291        }
2292        return 0;
2293}
2294
2295static int do_proc_douintvec_conv(unsigned long *lvalp,
2296                                  unsigned int *valp,
2297                                  int write, void *data)
2298{
2299        if (write) {
2300                if (*lvalp > UINT_MAX)
2301                        return -EINVAL;
2302                *valp = *lvalp;
2303        } else {
2304                unsigned int val = *valp;
2305                *lvalp = (unsigned long)val;
2306        }
2307        return 0;
2308}
2309
2310static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2311
2312static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2313                  int write, void __user *buffer,
2314                  size_t *lenp, loff_t *ppos,
2315                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2316                              int write, void *data),
2317                  void *data)
2318{
2319        int *i, vleft, first = 1, err = 0;
2320        size_t left;
2321        char *kbuf = NULL, *p;
2322        
2323        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2324                *lenp = 0;
2325                return 0;
2326        }
2327        
2328        i = (int *) tbl_data;
2329        vleft = table->maxlen / sizeof(*i);
2330        left = *lenp;
2331
2332        if (!conv)
2333                conv = do_proc_dointvec_conv;
2334
2335        if (write) {
2336                if (proc_first_pos_non_zero_ignore(ppos, table))
2337                        goto out;
2338
2339                if (left > PAGE_SIZE - 1)
2340                        left = PAGE_SIZE - 1;
2341                p = kbuf = memdup_user_nul(buffer, left);
2342                if (IS_ERR(kbuf))
2343                        return PTR_ERR(kbuf);
2344        }
2345
2346        for (; left && vleft--; i++, first=0) {
2347                unsigned long lval;
2348                bool neg;
2349
2350                if (write) {
2351                        left -= proc_skip_spaces(&p);
2352
2353                        if (!left)
2354                                break;
2355                        err = proc_get_long(&p, &left, &lval, &neg,
2356                                             proc_wspace_sep,
2357                                             sizeof(proc_wspace_sep), NULL);
2358                        if (err)
2359                                break;
2360                        if (conv(&neg, &lval, i, 1, data)) {
2361                                err = -EINVAL;
2362                                break;
2363                        }
2364                } else {
2365                        if (conv(&neg, &lval, i, 0, data)) {
2366                                err = -EINVAL;
2367                                break;
2368                        }
2369                        if (!first)
2370                                err = proc_put_char(&buffer, &left, '\t');
2371                        if (err)
2372                                break;
2373                        err = proc_put_long(&buffer, &left, lval, neg);
2374                        if (err)
2375                                break;
2376                }
2377        }
2378
2379        if (!write && !first && left && !err)
2380                err = proc_put_char(&buffer, &left, '\n');
2381        if (write && !err && left)
2382                left -= proc_skip_spaces(&p);
2383        if (write) {
2384                kfree(kbuf);
2385                if (first)
2386                        return err ? : -EINVAL;
2387        }
2388        *lenp -= left;
2389out:
2390        *ppos += *lenp;
2391        return err;
2392}
2393
2394static int do_proc_dointvec(struct ctl_table *table, int write,
2395                  void __user *buffer, size_t *lenp, loff_t *ppos,
2396                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2397                              int write, void *data),
2398                  void *data)
2399{
2400        return __do_proc_dointvec(table->data, table, write,
2401                        buffer, lenp, ppos, conv, data);
2402}
2403
2404static int do_proc_douintvec_w(unsigned int *tbl_data,
2405                               struct ctl_table *table,
2406                               void __user *buffer,
2407                               size_t *lenp, loff_t *ppos,
2408                               int (*conv)(unsigned long *lvalp,
2409                                           unsigned int *valp,
2410                                           int write, void *data),
2411                               void *data)
2412{
2413        unsigned long lval;
2414        int err = 0;
2415        size_t left;
2416        bool neg;
2417        char *kbuf = NULL, *p;
2418
2419        left = *lenp;
2420
2421        if (proc_first_pos_non_zero_ignore(ppos, table))
2422                goto bail_early;
2423
2424        if (left > PAGE_SIZE - 1)
2425                left = PAGE_SIZE - 1;
2426
2427        p = kbuf = memdup_user_nul(buffer, left);
2428        if (IS_ERR(kbuf))
2429                return -EINVAL;
2430
2431        left -= proc_skip_spaces(&p);
2432        if (!left) {
2433                err = -EINVAL;
2434                goto out_free;
2435        }
2436
2437        err = proc_get_long(&p, &left, &lval, &neg,
2438                             proc_wspace_sep,
2439                             sizeof(proc_wspace_sep), NULL);
2440        if (err || neg) {
2441                err = -EINVAL;
2442                goto out_free;
2443        }
2444
2445        if (conv(&lval, tbl_data, 1, data)) {
2446                err = -EINVAL;
2447                goto out_free;
2448        }
2449
2450        if (!err && left)
2451                left -= proc_skip_spaces(&p);
2452
2453out_free:
2454        kfree(kbuf);
2455        if (err)
2456                return -EINVAL;
2457
2458        return 0;
2459
2460        /* This is in keeping with old __do_proc_dointvec() */
2461bail_early:
2462        *ppos += *lenp;
2463        return err;
2464}
2465
2466static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2467                               size_t *lenp, loff_t *ppos,
2468                               int (*conv)(unsigned long *lvalp,
2469                                           unsigned int *valp,
2470                                           int write, void *data),
2471                               void *data)
2472{
2473        unsigned long lval;
2474        int err = 0;
2475        size_t left;
2476
2477        left = *lenp;
2478
2479        if (conv(&lval, tbl_data, 0, data)) {
2480                err = -EINVAL;
2481                goto out;
2482        }
2483
2484        err = proc_put_long(&buffer, &left, lval, false);
2485        if (err || !left)
2486                goto out;
2487
2488        err = proc_put_char(&buffer, &left, '\n');
2489
2490out:
2491        *lenp -= left;
2492        *ppos += *lenp;
2493
2494        return err;
2495}
2496
2497static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2498                               int write, void __user *buffer,
2499                               size_t *lenp, loff_t *ppos,
2500                               int (*conv)(unsigned long *lvalp,
2501                                           unsigned int *valp,
2502                                           int write, void *data),
2503                               void *data)
2504{
2505        unsigned int *i, vleft;
2506
2507        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2508                *lenp = 0;
2509                return 0;
2510        }
2511
2512        i = (unsigned int *) tbl_data;
2513        vleft = table->maxlen / sizeof(*i);
2514
2515        /*
2516         * Arrays are not supported, keep this simple. *Do not* add
2517         * support for them.
2518         */
2519        if (vleft != 1) {
2520                *lenp = 0;
2521                return -EINVAL;
2522        }
2523
2524        if (!conv)
2525                conv = do_proc_douintvec_conv;
2526
2527        if (write)
2528                return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2529                                           conv, data);
2530        return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2531}
2532
2533static int do_proc_douintvec(struct ctl_table *table, int write,
2534                             void __user *buffer, size_t *lenp, loff_t *ppos,
2535                             int (*conv)(unsigned long *lvalp,
2536                                         unsigned int *valp,
2537                                         int write, void *data),
2538                             void *data)
2539{
2540        return __do_proc_douintvec(table->data, table, write,
2541                                   buffer, lenp, ppos, conv, data);
2542}
2543
2544/**
2545 * proc_dointvec - read a vector of integers
2546 * @table: the sysctl table
2547 * @write: %TRUE if this is a write to the sysctl file
2548 * @buffer: the user buffer
2549 * @lenp: the size of the user buffer
2550 * @ppos: file position
2551 *
2552 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2553 * values from/to the user buffer, treated as an ASCII string. 
2554 *
2555 * Returns 0 on success.
2556 */
2557int proc_dointvec(struct ctl_table *table, int write,
2558                     void __user *buffer, size_t *lenp, loff_t *ppos)
2559{
2560        return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2561}
2562
2563/**
2564 * proc_douintvec - read a vector of unsigned integers
2565 * @table: the sysctl table
2566 * @write: %TRUE if this is a write to the sysctl file
2567 * @buffer: the user buffer
2568 * @lenp: the size of the user buffer
2569 * @ppos: file position
2570 *
2571 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2572 * values from/to the user buffer, treated as an ASCII string.
2573 *
2574 * Returns 0 on success.
2575 */
2576int proc_douintvec(struct ctl_table *table, int write,
2577                     void __user *buffer, size_t *lenp, loff_t *ppos)
2578{
2579        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2580                                 do_proc_douintvec_conv, NULL);
2581}
2582
2583/*
2584 * Taint values can only be increased
2585 * This means we can safely use a temporary.
2586 */
2587static int proc_taint(struct ctl_table *table, int write,
2588                               void __user *buffer, size_t *lenp, loff_t *ppos)
2589{
2590        struct ctl_table t;
2591        unsigned long tmptaint = get_taint();
2592        int err;
2593
2594        if (write && !capable(CAP_SYS_ADMIN))
2595                return -EPERM;
2596
2597        t = *table;
2598        t.data = &tmptaint;
2599        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2600        if (err < 0)
2601                return err;
2602
2603        if (write) {
2604                /*
2605                 * Poor man's atomic or. Not worth adding a primitive
2606                 * to everyone's atomic.h for this
2607                 */
2608                int i;
2609                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2610                        if ((tmptaint >> i) & 1)
2611                                add_taint(i, LOCKDEP_STILL_OK);
2612                }
2613        }
2614
2615        return err;
2616}
2617
2618#ifdef CONFIG_PRINTK
2619static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2620                                void __user *buffer, size_t *lenp, loff_t *ppos)
2621{
2622        if (write && !capable(CAP_SYS_ADMIN))
2623                return -EPERM;
2624
2625        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2626}
2627#endif
2628
2629/**
2630 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2631 * @min: pointer to minimum allowable value
2632 * @max: pointer to maximum allowable value
2633 *
2634 * The do_proc_dointvec_minmax_conv_param structure provides the
2635 * minimum and maximum values for doing range checking for those sysctl
2636 * parameters that use the proc_dointvec_minmax() handler.
2637 */
2638struct do_proc_dointvec_minmax_conv_param {
2639        int *min;
2640        int *max;
2641};
2642
2643static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2644                                        int *valp,
2645                                        int write, void *data)
2646{
2647        int tmp, ret;
2648        struct do_proc_dointvec_minmax_conv_param *param = data;
2649        /*
2650         * If writing, first do so via a temporary local int so we can
2651         * bounds-check it before touching *valp.
2652         */
2653        int *ip = write ? &tmp : valp;
2654
2655        ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2656        if (ret)
2657                return ret;
2658
2659        if (write) {
2660                if ((param->min && *param->min > tmp) ||
2661                    (param->max && *param->max < tmp))
2662                        return -EINVAL;
2663                *valp = tmp;
2664        }
2665
2666        return 0;
2667}
2668
2669/**
2670 * proc_dointvec_minmax - read a vector of integers with min/max values
2671 * @table: the sysctl table
2672 * @write: %TRUE if this is a write to the sysctl file
2673 * @buffer: the user buffer
2674 * @lenp: the size of the user buffer
2675 * @ppos: file position
2676 *
2677 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2678 * values from/to the user buffer, treated as an ASCII string.
2679 *
2680 * This routine will ensure the values are within the range specified by
2681 * table->extra1 (min) and table->extra2 (max).
2682 *
2683 * Returns 0 on success or -EINVAL on write when the range check fails.
2684 */
2685int proc_dointvec_minmax(struct ctl_table *table, int write,
2686                  void __user *buffer, size_t *lenp, loff_t *ppos)
2687{
2688        struct do_proc_dointvec_minmax_conv_param param = {
2689                .min = (int *) table->extra1,
2690                .max = (int *) table->extra2,
2691        };
2692        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2693                                do_proc_dointvec_minmax_conv, &param);
2694}
2695
2696/**
2697 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2698 * @min: pointer to minimum allowable value
2699 * @max: pointer to maximum allowable value
2700 *
2701 * The do_proc_douintvec_minmax_conv_param structure provides the
2702 * minimum and maximum values for doing range checking for those sysctl
2703 * parameters that use the proc_douintvec_minmax() handler.
2704 */
2705struct do_proc_douintvec_minmax_conv_param {
2706        unsigned int *min;
2707        unsigned int *max;
2708};
2709
2710static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2711                                         unsigned int *valp,
2712                                         int write, void *data)
2713{
2714        int ret;
2715        unsigned int tmp;
2716        struct do_proc_douintvec_minmax_conv_param *param = data;
2717        /* write via temporary local uint for bounds-checking */
2718        unsigned int *up = write ? &tmp : valp;
2719
2720        ret = do_proc_douintvec_conv(lvalp, up, write, data);
2721        if (ret)
2722                return ret;
2723
2724        if (write) {
2725                if ((param->min && *param->min > tmp) ||
2726                    (param->max && *param->max < tmp))
2727                        return -ERANGE;
2728
2729                *valp = tmp;
2730        }
2731
2732        return 0;
2733}
2734
2735/**
2736 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2737 * @table: the sysctl table
2738 * @write: %TRUE if this is a write to the sysctl file
2739 * @buffer: the user buffer
2740 * @lenp: the size of the user buffer
2741 * @ppos: file position
2742 *
2743 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2744 * values from/to the user buffer, treated as an ASCII string. Negative
2745 * strings are not allowed.
2746 *
2747 * This routine will ensure the values are within the range specified by
2748 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2749 * check for UINT_MAX to avoid having to support wrap around uses from
2750 * userspace.
2751 *
2752 * Returns 0 on success or -ERANGE on write when the range check fails.
2753 */
2754int proc_douintvec_minmax(struct ctl_table *table, int write,
2755                          void __user *buffer, size_t *lenp, loff_t *ppos)
2756{
2757        struct do_proc_douintvec_minmax_conv_param param = {
2758                .min = (unsigned int *) table->extra1,
2759                .max = (unsigned int *) table->extra2,
2760        };
2761        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2762                                 do_proc_douintvec_minmax_conv, &param);
2763}
2764
2765static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2766                                        unsigned int *valp,
2767                                        int write, void *data)
2768{
2769        if (write) {
2770                unsigned int val;
2771
2772                val = round_pipe_size(*lvalp);
2773                if (val == 0)
2774                        return -EINVAL;
2775
2776                *valp = val;
2777        } else {
2778                unsigned int val = *valp;
2779                *lvalp = (unsigned long) val;
2780        }
2781
2782        return 0;
2783}
2784
2785static int proc_dopipe_max_size(struct ctl_table *table, int write,
2786                                void __user *buffer, size_t *lenp, loff_t *ppos)
2787{
2788        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2789                                 do_proc_dopipe_max_size_conv, NULL);
2790}
2791
2792static void validate_coredump_safety(void)
2793{
2794#ifdef CONFIG_COREDUMP
2795        if (suid_dumpable == SUID_DUMP_ROOT &&
2796            core_pattern[0] != '/' && core_pattern[0] != '|') {
2797                printk(KERN_WARNING
2798"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2799"Pipe handler or fully qualified core dump path required.\n"
2800"Set kernel.core_pattern before fs.suid_dumpable.\n"
2801                );
2802        }
2803#endif
2804}
2805
2806static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2807                void __user *buffer, size_t *lenp, loff_t *ppos)
2808{
2809        int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2810        if (!error)
2811                validate_coredump_safety();
2812        return error;
2813}
2814
2815#ifdef CONFIG_COREDUMP
2816static int proc_dostring_coredump(struct ctl_table *table, int write,
2817                  void __user *buffer, size_t *lenp, loff_t *ppos)
2818{
2819        int error = proc_dostring(table, write, buffer, lenp, ppos);
2820        if (!error)
2821                validate_coredump_safety();
2822        return error;
2823}
2824#endif
2825
2826static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2827                                     void __user *buffer,
2828                                     size_t *lenp, loff_t *ppos,
2829                                     unsigned long convmul,
2830                                     unsigned long convdiv)
2831{
2832        unsigned long *i, *min, *max;
2833        int vleft, first = 1, err = 0;
2834        size_t left;
2835        char *kbuf = NULL, *p;
2836
2837        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2838                *lenp = 0;
2839                return 0;
2840        }
2841
2842        i = (unsigned long *) data;
2843        min = (unsigned long *) table->extra1;
2844        max = (unsigned long *) table->extra2;
2845        vleft = table->maxlen / sizeof(unsigned long);
2846        left = *lenp;
2847
2848        if (write) {
2849                if (proc_first_pos_non_zero_ignore(ppos, table))
2850                        goto out;
2851
2852                if (left > PAGE_SIZE - 1)
2853                        left = PAGE_SIZE - 1;
2854                p = kbuf = memdup_user_nul(buffer, left);
2855                if (IS_ERR(kbuf))
2856                        return PTR_ERR(kbuf);
2857        }
2858
2859        for (; left && vleft--; i++, first = 0) {
2860                unsigned long val;
2861
2862                if (write) {
2863                        bool neg;
2864
2865                        left -= proc_skip_spaces(&p);
2866                        if (!left)
2867                                break;
2868
2869                        err = proc_get_long(&p, &left, &val, &neg,
2870                                             proc_wspace_sep,
2871                                             sizeof(proc_wspace_sep), NULL);
2872                        if (err)
2873                                break;
2874                        if (neg)
2875                                continue;
2876                        val = convmul * val / convdiv;
2877                        if ((min && val < *min) || (max && val > *max))
2878                                continue;
2879                        *i = val;
2880                } else {
2881                        val = convdiv * (*i) / convmul;
2882                        if (!first) {
2883                                err = proc_put_char(&buffer, &left, '\t');
2884                                if (err)
2885                                        break;
2886                        }
2887                        err = proc_put_long(&buffer, &left, val, false);
2888                        if (err)
2889                                break;
2890                }
2891        }
2892
2893        if (!write && !first && left && !err)
2894                err = proc_put_char(&buffer, &left, '\n');
2895        if (write && !err)
2896                left -= proc_skip_spaces(&p);
2897        if (write) {
2898                kfree(kbuf);
2899                if (first)
2900                        return err ? : -EINVAL;
2901        }
2902        *lenp -= left;
2903out:
2904        *ppos += *lenp;
2905        return err;
2906}
2907
2908static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2909                                     void __user *buffer,
2910                                     size_t *lenp, loff_t *ppos,
2911                                     unsigned long convmul,
2912                                     unsigned long convdiv)
2913{
2914        return __do_proc_doulongvec_minmax(table->data, table, write,
2915                        buffer, lenp, ppos, convmul, convdiv);
2916}
2917
2918/**
2919 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2920 * @table: the sysctl table
2921 * @write: %TRUE if this is a write to the sysctl file
2922 * @buffer: the user buffer
2923 * @lenp: the size of the user buffer
2924 * @ppos: file position
2925 *
2926 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2927 * values from/to the user buffer, treated as an ASCII string.
2928 *
2929 * This routine will ensure the values are within the range specified by
2930 * table->extra1 (min) and table->extra2 (max).
2931 *
2932 * Returns 0 on success.
2933 */
2934int proc_doulongvec_minmax(struct ctl_table *table, int write,
2935                           void __user *buffer, size_t *lenp, loff_t *ppos)
2936{
2937    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2938}
2939
2940/**
2941 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2942 * @table: the sysctl table
2943 * @write: %TRUE if this is a write to the sysctl file
2944 * @buffer: the user buffer
2945 * @lenp: the size of the user buffer
2946 * @ppos: file position
2947 *
2948 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2949 * values from/to the user buffer, treated as an ASCII string. The values
2950 * are treated as milliseconds, and converted to jiffies when they are stored.
2951 *
2952 * This routine will ensure the values are within the range specified by
2953 * table->extra1 (min) and table->extra2 (max).
2954 *
2955 * Returns 0 on success.
2956 */
2957int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2958                                      void __user *buffer,
2959                                      size_t *lenp, loff_t *ppos)
2960{
2961    return do_proc_doulongvec_minmax(table, write, buffer,
2962                                     lenp, ppos, HZ, 1000l);
2963}
2964
2965
2966static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2967                                         int *valp,
2968                                         int write, void *data)
2969{
2970        if (write) {
2971                if (*lvalp > INT_MAX / HZ)
2972                        return 1;
2973                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2974        } else {
2975                int val = *valp;
2976                unsigned long lval;
2977                if (val < 0) {
2978                        *negp = true;
2979                        lval = -(unsigned long)val;
2980                } else {
2981                        *negp = false;
2982                        lval = (unsigned long)val;
2983                }
2984                *lvalp = lval / HZ;
2985        }
2986        return 0;
2987}
2988
2989static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2990                                                int *valp,
2991                                                int write, void *data)
2992{
2993        if (write) {
2994                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2995                        return 1;
2996                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2997        } else {
2998                int val = *valp;
2999                unsigned long lval;
3000                if (val < 0) {
3001                        *negp = true;
3002                        lval = -(unsigned long)val;
3003                } else {
3004                        *negp = false;
3005                        lval = (unsigned long)val;
3006                }
3007                *lvalp = jiffies_to_clock_t(lval);
3008        }
3009        return 0;
3010}
3011
3012static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3013                                            int *valp,
3014                                            int write, void *data)
3015{
3016        if (write) {
3017                unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3018
3019                if (jif > INT_MAX)
3020                        return 1;
3021                *valp = (int)jif;
3022        } else {
3023                int val = *valp;
3024                unsigned long lval;
3025                if (val < 0) {
3026                        *negp = true;
3027                        lval = -(unsigned long)val;
3028                } else {
3029                        *negp = false;
3030                        lval = (unsigned long)val;
3031                }
3032                *lvalp = jiffies_to_msecs(lval);
3033        }
3034        return 0;
3035}
3036
3037/**
3038 * proc_dointvec_jiffies - read a vector of integers as seconds
3039 * @table: the sysctl table
3040 * @write: %TRUE if this is a write to the sysctl file
3041 * @buffer: the user buffer
3042 * @lenp: the size of the user buffer
3043 * @ppos: file position
3044 *
3045 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3046 * values from/to the user buffer, treated as an ASCII string. 
3047 * The values read are assumed to be in seconds, and are converted into
3048 * jiffies.
3049 *
3050 * Returns 0 on success.
3051 */
3052int proc_dointvec_jiffies(struct ctl_table *table, int write,
3053                          void __user *buffer, size_t *lenp, loff_t *ppos)
3054{
3055    return do_proc_dointvec(table,write,buffer,lenp,ppos,
3056                            do_proc_dointvec_jiffies_conv,NULL);
3057}
3058
3059/**
3060 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3061 * @table: the sysctl table
3062 * @write: %TRUE if this is a write to the sysctl file
3063 * @buffer: the user buffer
3064 * @lenp: the size of the user buffer
3065 * @ppos: pointer to the file position
3066 *
3067 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3068 * values from/to the user buffer, treated as an ASCII string. 
3069 * The values read are assumed to be in 1/USER_HZ seconds, and 
3070 * are converted into jiffies.
3071 *
3072 * Returns 0 on success.
3073 */
3074int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3075                                 void __user *buffer, size_t *lenp, loff_t *ppos)
3076{
3077    return do_proc_dointvec(table,write,buffer,lenp,ppos,
3078                            do_proc_dointvec_userhz_jiffies_conv,NULL);
3079}
3080
3081/**
3082 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3083 * @table: the sysctl table
3084 * @write: %TRUE if this is a write to the sysctl file
3085 * @buffer: the user buffer
3086 * @lenp: the size of the user buffer
3087 * @ppos: file position
3088 * @ppos: the current position in the file
3089 *
3090 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3091 * values from/to the user buffer, treated as an ASCII string. 
3092 * The values read are assumed to be in 1/1000 seconds, and 
3093 * are converted into jiffies.
3094 *
3095 * Returns 0 on success.
3096 */
3097int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3098                             void __user *buffer, size_t *lenp, loff_t *ppos)
3099{
3100        return do_proc_dointvec(table, write, buffer, lenp, ppos,
3101                                do_proc_dointvec_ms_jiffies_conv, NULL);
3102}
3103
3104static int proc_do_cad_pid(struct ctl_table *table, int write,
3105                           void __user *buffer, size_t *lenp, loff_t *ppos)
3106{
3107        struct pid *new_pid;
3108        pid_t tmp;
3109        int r;
3110
3111        tmp = pid_vnr(cad_pid);
3112
3113        r = __do_proc_dointvec(&tmp, table, write, buffer,
3114                               lenp, ppos, NULL, NULL);
3115        if (r || !write)
3116                return r;
3117
3118        new_pid = find_get_pid(tmp);
3119        if (!new_pid)
3120                return -ESRCH;
3121
3122        put_pid(xchg(&cad_pid, new_pid));
3123        return 0;
3124}
3125
3126/**
3127 * proc_do_large_bitmap - read/write from/to a large bitmap
3128 * @table: the sysctl table
3129 * @write: %TRUE if this is a write to the sysctl file
3130 * @buffer: the user buffer
3131 * @lenp: the size of the user buffer
3132 * @ppos: file position
3133 *
3134 * The bitmap is stored at table->data and the bitmap length (in bits)
3135 * in table->maxlen.
3136 *
3137 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3138 * large bitmaps may be represented in a compact manner. Writing into
3139 * the file will clear the bitmap then update it with the given input.
3140 *
3141 * Returns 0 on success.
3142 */
3143int proc_do_large_bitmap(struct ctl_table *table, int write,
3144                         void __user *buffer, size_t *lenp, loff_t *ppos)
3145{
3146        int err = 0;
3147        bool first = 1;
3148        size_t left = *lenp;
3149        unsigned long bitmap_len = table->maxlen;
3150        unsigned long *bitmap = *(unsigned long **) table->data;
3151        unsigned long *tmp_bitmap = NULL;
3152        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3153
3154        if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3155                *lenp = 0;
3156                return 0;
3157        }
3158
3159        if (write) {
3160                char *kbuf, *p;
3161
3162                if (left > PAGE_SIZE - 1)
3163                        left = PAGE_SIZE - 1;
3164
3165                p = kbuf = memdup_user_nul(buffer, left);
3166                if (IS_ERR(kbuf))
3167                        return PTR_ERR(kbuf);
3168
3169                tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3170                                     sizeof(unsigned long),
3171                                     GFP_KERNEL);
3172                if (!tmp_bitmap) {
3173                        kfree(kbuf);
3174                        return -ENOMEM;
3175                }
3176                proc_skip_char(&p, &left, '\n');
3177                while (!err && left) {
3178                        unsigned long val_a, val_b;
3179                        bool neg;
3180
3181                        err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3182                                             sizeof(tr_a), &c);
3183                        if (err)
3184                                break;
3185                        if (val_a >= bitmap_len || neg) {
3186                                err = -EINVAL;
3187                                break;
3188                        }
3189
3190                        val_b = val_a;
3191                        if (left) {
3192                                p++;
3193                                left--;
3194                        }
3195
3196                        if (c == '-') {
3197                                err = proc_get_long(&p, &left, &val_b,
3198                                                     &neg, tr_b, sizeof(tr_b),
3199                                                     &c);
3200                                if (err)
3201                                        break;
3202                                if (val_b >= bitmap_len || neg ||
3203                                    val_a > val_b) {
3204                                        err = -EINVAL;
3205                                        break;
3206                                }
3207                                if (left) {
3208                                        p++;
3209                                        left--;
3210                                }
3211                        }
3212
3213                        bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3214                        first = 0;
3215                        proc_skip_char(&p, &left, '\n');
3216                }
3217                kfree(kbuf);
3218        } else {
3219                unsigned long bit_a, bit_b = 0;
3220
3221                while (left) {
3222                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3223                        if (bit_a >= bitmap_len)
3224                                break;
3225                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
3226                                                   bit_a + 1) - 1;
3227
3228                        if (!first) {
3229                                err = proc_put_char(&buffer, &left, ',');
3230                                if (err)
3231                                        break;
3232                        }
3233                        err = proc_put_long(&buffer, &left, bit_a, false);
3234                        if (err)
3235                                break;
3236                        if (bit_a != bit_b) {
3237                                err = proc_put_char(&buffer, &left, '-');
3238                                if (err)
3239                                        break;
3240                                err = proc_put_long(&buffer, &left, bit_b, false);
3241                                if (err)
3242                                        break;
3243                        }
3244
3245                        first = 0; bit_b++;
3246                }
3247                if (!err)
3248                        err = proc_put_char(&buffer, &left, '\n');
3249        }
3250
3251        if (!err) {
3252                if (write) {
3253                        if (*ppos)
3254                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3255                        else
3256                                bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3257                }
3258                *lenp -= left;
3259                *ppos += *lenp;
3260        }
3261
3262        kfree(tmp_bitmap);
3263        return err;
3264}
3265
3266#else /* CONFIG_PROC_SYSCTL */
3267
3268int proc_dostring(struct ctl_table *table, int write,
3269                  void __user *buffer, size_t *lenp, loff_t *ppos)
3270{
3271        return -ENOSYS;
3272}
3273
3274int proc_dointvec(struct ctl_table *table, int write,
3275                  void __user *buffer, size_t *lenp, loff_t *ppos)
3276{
3277        return -ENOSYS;
3278}
3279
3280int proc_douintvec(struct ctl_table *table, int write,
3281                  void __user *buffer, size_t *lenp, loff_t *ppos)
3282{
3283        return -ENOSYS;
3284}
3285
3286int proc_dointvec_minmax(struct ctl_table *table, int write,
3287                    void __user *buffer, size_t *lenp, loff_t *ppos)
3288{
3289        return -ENOSYS;
3290}
3291
3292int proc_douintvec_minmax(struct ctl_table *table, int write,
3293                          void __user *buffer, size_t *lenp, loff_t *ppos)
3294{
3295        return -ENOSYS;
3296}
3297
3298int proc_dointvec_jiffies(struct ctl_table *table, int write,
3299                    void __user *buffer, size_t *lenp, loff_t *ppos)
3300{
3301        return -ENOSYS;
3302}
3303
3304int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3305                    void __user *buffer, size_t *lenp, loff_t *ppos)
3306{
3307        return -ENOSYS;
3308}
3309
3310int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3311                             void __user *buffer, size_t *lenp, loff_t *ppos)
3312{
3313        return -ENOSYS;
3314}
3315
3316int proc_doulongvec_minmax(struct ctl_table *table, int write,
3317                    void __user *buffer, size_t *lenp, loff_t *ppos)
3318{
3319        return -ENOSYS;
3320}
3321
3322int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3323                                      void __user *buffer,
3324                                      size_t *lenp, loff_t *ppos)
3325{
3326    return -ENOSYS;
3327}
3328
3329
3330#endif /* CONFIG_PROC_SYSCTL */
3331
3332#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
3333static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3334                                          void __user *buffer, size_t *lenp,
3335                                          loff_t *ppos)
3336{
3337        int ret, bpf_stats = *(int *)table->data;
3338        struct ctl_table tmp = *table;
3339
3340        if (write && !capable(CAP_SYS_ADMIN))
3341                return -EPERM;
3342
3343        tmp.data = &bpf_stats;
3344        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3345        if (write && !ret) {
3346                *(int *)table->data = bpf_stats;
3347                if (bpf_stats)
3348                        static_branch_enable(&bpf_stats_enabled_key);
3349                else
3350                        static_branch_disable(&bpf_stats_enabled_key);
3351        }
3352        return ret;
3353}
3354#endif
3355/*
3356 * No sense putting this after each symbol definition, twice,
3357 * exception granted :-)
3358 */
3359EXPORT_SYMBOL(proc_dointvec);
3360EXPORT_SYMBOL(proc_douintvec);
3361EXPORT_SYMBOL(proc_dointvec_jiffies);
3362EXPORT_SYMBOL(proc_dointvec_minmax);
3363EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3364EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3365EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3366EXPORT_SYMBOL(proc_dostring);
3367EXPORT_SYMBOL(proc_doulongvec_minmax);
3368EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3369