linux/kernel/sysctl.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * sysctl.c: General linux system control interface
   4 *
   5 * Begun 24 March 1995, Stephen Tweedie
   6 * Added /proc support, Dec 1995
   7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
  10 * Dynamic registration fixes, Stephen Tweedie.
  11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  13 *  Horn.
  14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  17 *  Wendling.
  18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  19 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  20 */
  21
  22#include <linux/module.h>
  23#include <linux/aio.h>
  24#include <linux/mm.h>
  25#include <linux/swap.h>
  26#include <linux/slab.h>
  27#include <linux/sysctl.h>
  28#include <linux/bitmap.h>
  29#include <linux/signal.h>
  30#include <linux/printk.h>
  31#include <linux/proc_fs.h>
  32#include <linux/security.h>
  33#include <linux/ctype.h>
  34#include <linux/kmemleak.h>
  35#include <linux/fs.h>
  36#include <linux/init.h>
  37#include <linux/kernel.h>
  38#include <linux/kobject.h>
  39#include <linux/net.h>
  40#include <linux/sysrq.h>
  41#include <linux/highuid.h>
  42#include <linux/writeback.h>
  43#include <linux/ratelimit.h>
  44#include <linux/compaction.h>
  45#include <linux/hugetlb.h>
  46#include <linux/initrd.h>
  47#include <linux/key.h>
  48#include <linux/times.h>
  49#include <linux/limits.h>
  50#include <linux/dcache.h>
  51#include <linux/dnotify.h>
  52#include <linux/syscalls.h>
  53#include <linux/vmstat.h>
  54#include <linux/nfs_fs.h>
  55#include <linux/acpi.h>
  56#include <linux/reboot.h>
  57#include <linux/ftrace.h>
  58#include <linux/perf_event.h>
  59#include <linux/kprobes.h>
  60#include <linux/pipe_fs_i.h>
  61#include <linux/oom.h>
  62#include <linux/kmod.h>
  63#include <linux/capability.h>
  64#include <linux/binfmts.h>
  65#include <linux/sched/sysctl.h>
  66#include <linux/sched/coredump.h>
  67#include <linux/kexec.h>
  68#include <linux/bpf.h>
  69#include <linux/mount.h>
  70#include <linux/userfaultfd_k.h>
  71
  72#include "../lib/kstrtox.h"
  73
  74#include <linux/uaccess.h>
  75#include <asm/processor.h>
  76
  77#ifdef CONFIG_X86
  78#include <asm/nmi.h>
  79#include <asm/stacktrace.h>
  80#include <asm/io.h>
  81#endif
  82#ifdef CONFIG_SPARC
  83#include <asm/setup.h>
  84#endif
  85#ifdef CONFIG_BSD_PROCESS_ACCT
  86#include <linux/acct.h>
  87#endif
  88#ifdef CONFIG_RT_MUTEXES
  89#include <linux/rtmutex.h>
  90#endif
  91#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
  92#include <linux/lockdep.h>
  93#endif
  94#ifdef CONFIG_CHR_DEV_SG
  95#include <scsi/sg.h>
  96#endif
  97#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
  98#include <linux/stackleak.h>
  99#endif
 100#ifdef CONFIG_LOCKUP_DETECTOR
 101#include <linux/nmi.h>
 102#endif
 103
 104#if defined(CONFIG_SYSCTL)
 105
 106/* External variables not in a header file. */
 107extern int suid_dumpable;
 108#ifdef CONFIG_COREDUMP
 109extern int core_uses_pid;
 110extern char core_pattern[];
 111extern unsigned int core_pipe_limit;
 112#endif
 113extern int pid_max;
 114extern int pid_max_min, pid_max_max;
 115extern int percpu_pagelist_fraction;
 116extern int latencytop_enabled;
 117extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 118#ifndef CONFIG_MMU
 119extern int sysctl_nr_trim_pages;
 120#endif
 121
 122/* Constants used for minimum and  maximum */
 123#ifdef CONFIG_LOCKUP_DETECTOR
 124static int sixty = 60;
 125#endif
 126
 127static int __maybe_unused neg_one = -1;
 128
 129static int zero;
 130static int __maybe_unused one = 1;
 131static int __maybe_unused two = 2;
 132static int __maybe_unused four = 4;
 133static unsigned long zero_ul;
 134static unsigned long one_ul = 1;
 135static unsigned long long_max = LONG_MAX;
 136static int one_hundred = 100;
 137static int one_thousand = 1000;
 138#ifdef CONFIG_PRINTK
 139static int ten_thousand = 10000;
 140#endif
 141#ifdef CONFIG_PERF_EVENTS
 142static int six_hundred_forty_kb = 640 * 1024;
 143#endif
 144
 145/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 146static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 147
 148/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 149static int maxolduid = 65535;
 150static int minolduid;
 151
 152static int ngroups_max = NGROUPS_MAX;
 153static const int cap_last_cap = CAP_LAST_CAP;
 154
 155/*
 156 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
 157 * and hung_task_check_interval_secs
 158 */
 159#ifdef CONFIG_DETECT_HUNG_TASK
 160static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 161#endif
 162
 163#ifdef CONFIG_INOTIFY_USER
 164#include <linux/inotify.h>
 165#endif
 166#ifdef CONFIG_SPARC
 167#endif
 168
 169#ifdef __hppa__
 170extern int pwrsw_enabled;
 171#endif
 172
 173#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 174extern int unaligned_enabled;
 175#endif
 176
 177#ifdef CONFIG_IA64
 178extern int unaligned_dump_stack;
 179#endif
 180
 181#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 182extern int no_unaligned_warning;
 183#endif
 184
 185#ifdef CONFIG_PROC_SYSCTL
 186
 187/**
 188 * enum sysctl_writes_mode - supported sysctl write modes
 189 *
 190 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 191 *      to be written, and multiple writes on the same sysctl file descriptor
 192 *      will rewrite the sysctl value, regardless of file position. No warning
 193 *      is issued when the initial position is not 0.
 194 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 195 *      not 0.
 196 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 197 *      file position 0 and the value must be fully contained in the buffer
 198 *      sent to the write syscall. If dealing with strings respect the file
 199 *      position, but restrict this to the max length of the buffer, anything
 200 *      passed the max lenght will be ignored. Multiple writes will append
 201 *      to the buffer.
 202 *
 203 * These write modes control how current file position affects the behavior of
 204 * updating sysctl values through the proc interface on each write.
 205 */
 206enum sysctl_writes_mode {
 207        SYSCTL_WRITES_LEGACY            = -1,
 208        SYSCTL_WRITES_WARN              = 0,
 209        SYSCTL_WRITES_STRICT            = 1,
 210};
 211
 212static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 213
 214static int proc_do_cad_pid(struct ctl_table *table, int write,
 215                  void __user *buffer, size_t *lenp, loff_t *ppos);
 216static int proc_taint(struct ctl_table *table, int write,
 217                               void __user *buffer, size_t *lenp, loff_t *ppos);
 218#endif
 219
 220#ifdef CONFIG_PRINTK
 221static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 222                                void __user *buffer, size_t *lenp, loff_t *ppos);
 223#endif
 224
 225static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 226                void __user *buffer, size_t *lenp, loff_t *ppos);
 227#ifdef CONFIG_COREDUMP
 228static int proc_dostring_coredump(struct ctl_table *table, int write,
 229                void __user *buffer, size_t *lenp, loff_t *ppos);
 230#endif
 231static int proc_dopipe_max_size(struct ctl_table *table, int write,
 232                void __user *buffer, size_t *lenp, loff_t *ppos);
 233
 234#ifdef CONFIG_MAGIC_SYSRQ
 235/* Note: sysrq code uses its own private copy */
 236static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 237
 238static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 239                                void __user *buffer, size_t *lenp,
 240                                loff_t *ppos)
 241{
 242        int error;
 243
 244        error = proc_dointvec(table, write, buffer, lenp, ppos);
 245        if (error)
 246                return error;
 247
 248        if (write)
 249                sysrq_toggle_support(__sysrq_enabled);
 250
 251        return 0;
 252}
 253
 254#endif
 255
 256static struct ctl_table kern_table[];
 257static struct ctl_table vm_table[];
 258static struct ctl_table fs_table[];
 259static struct ctl_table debug_table[];
 260static struct ctl_table dev_table[];
 261extern struct ctl_table random_table[];
 262#ifdef CONFIG_EPOLL
 263extern struct ctl_table epoll_table[];
 264#endif
 265
 266#ifdef CONFIG_FW_LOADER_USER_HELPER
 267extern struct ctl_table firmware_config_table[];
 268#endif
 269
 270#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 271int sysctl_legacy_va_layout;
 272#endif
 273
 274/* The default sysctl tables: */
 275
 276static struct ctl_table sysctl_base_table[] = {
 277        {
 278                .procname       = "kernel",
 279                .mode           = 0555,
 280                .child          = kern_table,
 281        },
 282        {
 283                .procname       = "vm",
 284                .mode           = 0555,
 285                .child          = vm_table,
 286        },
 287        {
 288                .procname       = "fs",
 289                .mode           = 0555,
 290                .child          = fs_table,
 291        },
 292        {
 293                .procname       = "debug",
 294                .mode           = 0555,
 295                .child          = debug_table,
 296        },
 297        {
 298                .procname       = "dev",
 299                .mode           = 0555,
 300                .child          = dev_table,
 301        },
 302        { }
 303};
 304
 305#ifdef CONFIG_SCHED_DEBUG
 306static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 307static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 308static int min_wakeup_granularity_ns;                   /* 0 usecs */
 309static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 310#ifdef CONFIG_SMP
 311static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 312static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 313#endif /* CONFIG_SMP */
 314#endif /* CONFIG_SCHED_DEBUG */
 315
 316#ifdef CONFIG_COMPACTION
 317static int min_extfrag_threshold;
 318static int max_extfrag_threshold = 1000;
 319#endif
 320
 321static struct ctl_table kern_table[] = {
 322        {
 323                .procname       = "sched_child_runs_first",
 324                .data           = &sysctl_sched_child_runs_first,
 325                .maxlen         = sizeof(unsigned int),
 326                .mode           = 0644,
 327                .proc_handler   = proc_dointvec,
 328        },
 329#ifdef CONFIG_SCHED_DEBUG
 330        {
 331                .procname       = "sched_min_granularity_ns",
 332                .data           = &sysctl_sched_min_granularity,
 333                .maxlen         = sizeof(unsigned int),
 334                .mode           = 0644,
 335                .proc_handler   = sched_proc_update_handler,
 336                .extra1         = &min_sched_granularity_ns,
 337                .extra2         = &max_sched_granularity_ns,
 338        },
 339        {
 340                .procname       = "sched_latency_ns",
 341                .data           = &sysctl_sched_latency,
 342                .maxlen         = sizeof(unsigned int),
 343                .mode           = 0644,
 344                .proc_handler   = sched_proc_update_handler,
 345                .extra1         = &min_sched_granularity_ns,
 346                .extra2         = &max_sched_granularity_ns,
 347        },
 348        {
 349                .procname       = "sched_wakeup_granularity_ns",
 350                .data           = &sysctl_sched_wakeup_granularity,
 351                .maxlen         = sizeof(unsigned int),
 352                .mode           = 0644,
 353                .proc_handler   = sched_proc_update_handler,
 354                .extra1         = &min_wakeup_granularity_ns,
 355                .extra2         = &max_wakeup_granularity_ns,
 356        },
 357#ifdef CONFIG_SMP
 358        {
 359                .procname       = "sched_tunable_scaling",
 360                .data           = &sysctl_sched_tunable_scaling,
 361                .maxlen         = sizeof(enum sched_tunable_scaling),
 362                .mode           = 0644,
 363                .proc_handler   = sched_proc_update_handler,
 364                .extra1         = &min_sched_tunable_scaling,
 365                .extra2         = &max_sched_tunable_scaling,
 366        },
 367        {
 368                .procname       = "sched_migration_cost_ns",
 369                .data           = &sysctl_sched_migration_cost,
 370                .maxlen         = sizeof(unsigned int),
 371                .mode           = 0644,
 372                .proc_handler   = proc_dointvec,
 373        },
 374        {
 375                .procname       = "sched_nr_migrate",
 376                .data           = &sysctl_sched_nr_migrate,
 377                .maxlen         = sizeof(unsigned int),
 378                .mode           = 0644,
 379                .proc_handler   = proc_dointvec,
 380        },
 381#ifdef CONFIG_SCHEDSTATS
 382        {
 383                .procname       = "sched_schedstats",
 384                .data           = NULL,
 385                .maxlen         = sizeof(unsigned int),
 386                .mode           = 0644,
 387                .proc_handler   = sysctl_schedstats,
 388                .extra1         = &zero,
 389                .extra2         = &one,
 390        },
 391#endif /* CONFIG_SCHEDSTATS */
 392#endif /* CONFIG_SMP */
 393#ifdef CONFIG_NUMA_BALANCING
 394        {
 395                .procname       = "numa_balancing_scan_delay_ms",
 396                .data           = &sysctl_numa_balancing_scan_delay,
 397                .maxlen         = sizeof(unsigned int),
 398                .mode           = 0644,
 399                .proc_handler   = proc_dointvec,
 400        },
 401        {
 402                .procname       = "numa_balancing_scan_period_min_ms",
 403                .data           = &sysctl_numa_balancing_scan_period_min,
 404                .maxlen         = sizeof(unsigned int),
 405                .mode           = 0644,
 406                .proc_handler   = proc_dointvec,
 407        },
 408        {
 409                .procname       = "numa_balancing_scan_period_max_ms",
 410                .data           = &sysctl_numa_balancing_scan_period_max,
 411                .maxlen         = sizeof(unsigned int),
 412                .mode           = 0644,
 413                .proc_handler   = proc_dointvec,
 414        },
 415        {
 416                .procname       = "numa_balancing_scan_size_mb",
 417                .data           = &sysctl_numa_balancing_scan_size,
 418                .maxlen         = sizeof(unsigned int),
 419                .mode           = 0644,
 420                .proc_handler   = proc_dointvec_minmax,
 421                .extra1         = &one,
 422        },
 423        {
 424                .procname       = "numa_balancing",
 425                .data           = NULL, /* filled in by handler */
 426                .maxlen         = sizeof(unsigned int),
 427                .mode           = 0644,
 428                .proc_handler   = sysctl_numa_balancing,
 429                .extra1         = &zero,
 430                .extra2         = &one,
 431        },
 432#endif /* CONFIG_NUMA_BALANCING */
 433#endif /* CONFIG_SCHED_DEBUG */
 434        {
 435                .procname       = "sched_rt_period_us",
 436                .data           = &sysctl_sched_rt_period,
 437                .maxlen         = sizeof(unsigned int),
 438                .mode           = 0644,
 439                .proc_handler   = sched_rt_handler,
 440        },
 441        {
 442                .procname       = "sched_rt_runtime_us",
 443                .data           = &sysctl_sched_rt_runtime,
 444                .maxlen         = sizeof(int),
 445                .mode           = 0644,
 446                .proc_handler   = sched_rt_handler,
 447        },
 448        {
 449                .procname       = "sched_rr_timeslice_ms",
 450                .data           = &sysctl_sched_rr_timeslice,
 451                .maxlen         = sizeof(int),
 452                .mode           = 0644,
 453                .proc_handler   = sched_rr_handler,
 454        },
 455#ifdef CONFIG_SCHED_AUTOGROUP
 456        {
 457                .procname       = "sched_autogroup_enabled",
 458                .data           = &sysctl_sched_autogroup_enabled,
 459                .maxlen         = sizeof(unsigned int),
 460                .mode           = 0644,
 461                .proc_handler   = proc_dointvec_minmax,
 462                .extra1         = &zero,
 463                .extra2         = &one,
 464        },
 465#endif
 466#ifdef CONFIG_CFS_BANDWIDTH
 467        {
 468                .procname       = "sched_cfs_bandwidth_slice_us",
 469                .data           = &sysctl_sched_cfs_bandwidth_slice,
 470                .maxlen         = sizeof(unsigned int),
 471                .mode           = 0644,
 472                .proc_handler   = proc_dointvec_minmax,
 473                .extra1         = &one,
 474        },
 475#endif
 476#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 477        {
 478                .procname       = "sched_energy_aware",
 479                .data           = &sysctl_sched_energy_aware,
 480                .maxlen         = sizeof(unsigned int),
 481                .mode           = 0644,
 482                .proc_handler   = sched_energy_aware_handler,
 483                .extra1         = &zero,
 484                .extra2         = &one,
 485        },
 486#endif
 487#ifdef CONFIG_PROVE_LOCKING
 488        {
 489                .procname       = "prove_locking",
 490                .data           = &prove_locking,
 491                .maxlen         = sizeof(int),
 492                .mode           = 0644,
 493                .proc_handler   = proc_dointvec,
 494        },
 495#endif
 496#ifdef CONFIG_LOCK_STAT
 497        {
 498                .procname       = "lock_stat",
 499                .data           = &lock_stat,
 500                .maxlen         = sizeof(int),
 501                .mode           = 0644,
 502                .proc_handler   = proc_dointvec,
 503        },
 504#endif
 505        {
 506                .procname       = "panic",
 507                .data           = &panic_timeout,
 508                .maxlen         = sizeof(int),
 509                .mode           = 0644,
 510                .proc_handler   = proc_dointvec,
 511        },
 512#ifdef CONFIG_COREDUMP
 513        {
 514                .procname       = "core_uses_pid",
 515                .data           = &core_uses_pid,
 516                .maxlen         = sizeof(int),
 517                .mode           = 0644,
 518                .proc_handler   = proc_dointvec,
 519        },
 520        {
 521                .procname       = "core_pattern",
 522                .data           = core_pattern,
 523                .maxlen         = CORENAME_MAX_SIZE,
 524                .mode           = 0644,
 525                .proc_handler   = proc_dostring_coredump,
 526        },
 527        {
 528                .procname       = "core_pipe_limit",
 529                .data           = &core_pipe_limit,
 530                .maxlen         = sizeof(unsigned int),
 531                .mode           = 0644,
 532                .proc_handler   = proc_dointvec,
 533        },
 534#endif
 535#ifdef CONFIG_PROC_SYSCTL
 536        {
 537                .procname       = "tainted",
 538                .maxlen         = sizeof(long),
 539                .mode           = 0644,
 540                .proc_handler   = proc_taint,
 541        },
 542        {
 543                .procname       = "sysctl_writes_strict",
 544                .data           = &sysctl_writes_strict,
 545                .maxlen         = sizeof(int),
 546                .mode           = 0644,
 547                .proc_handler   = proc_dointvec_minmax,
 548                .extra1         = &neg_one,
 549                .extra2         = &one,
 550        },
 551#endif
 552#ifdef CONFIG_LATENCYTOP
 553        {
 554                .procname       = "latencytop",
 555                .data           = &latencytop_enabled,
 556                .maxlen         = sizeof(int),
 557                .mode           = 0644,
 558                .proc_handler   = sysctl_latencytop,
 559        },
 560#endif
 561#ifdef CONFIG_BLK_DEV_INITRD
 562        {
 563                .procname       = "real-root-dev",
 564                .data           = &real_root_dev,
 565                .maxlen         = sizeof(int),
 566                .mode           = 0644,
 567                .proc_handler   = proc_dointvec,
 568        },
 569#endif
 570        {
 571                .procname       = "print-fatal-signals",
 572                .data           = &print_fatal_signals,
 573                .maxlen         = sizeof(int),
 574                .mode           = 0644,
 575                .proc_handler   = proc_dointvec,
 576        },
 577#ifdef CONFIG_SPARC
 578        {
 579                .procname       = "reboot-cmd",
 580                .data           = reboot_command,
 581                .maxlen         = 256,
 582                .mode           = 0644,
 583                .proc_handler   = proc_dostring,
 584        },
 585        {
 586                .procname       = "stop-a",
 587                .data           = &stop_a_enabled,
 588                .maxlen         = sizeof (int),
 589                .mode           = 0644,
 590                .proc_handler   = proc_dointvec,
 591        },
 592        {
 593                .procname       = "scons-poweroff",
 594                .data           = &scons_pwroff,
 595                .maxlen         = sizeof (int),
 596                .mode           = 0644,
 597                .proc_handler   = proc_dointvec,
 598        },
 599#endif
 600#ifdef CONFIG_SPARC64
 601        {
 602                .procname       = "tsb-ratio",
 603                .data           = &sysctl_tsb_ratio,
 604                .maxlen         = sizeof (int),
 605                .mode           = 0644,
 606                .proc_handler   = proc_dointvec,
 607        },
 608#endif
 609#ifdef __hppa__
 610        {
 611                .procname       = "soft-power",
 612                .data           = &pwrsw_enabled,
 613                .maxlen         = sizeof (int),
 614                .mode           = 0644,
 615                .proc_handler   = proc_dointvec,
 616        },
 617#endif
 618#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 619        {
 620                .procname       = "unaligned-trap",
 621                .data           = &unaligned_enabled,
 622                .maxlen         = sizeof (int),
 623                .mode           = 0644,
 624                .proc_handler   = proc_dointvec,
 625        },
 626#endif
 627        {
 628                .procname       = "ctrl-alt-del",
 629                .data           = &C_A_D,
 630                .maxlen         = sizeof(int),
 631                .mode           = 0644,
 632                .proc_handler   = proc_dointvec,
 633        },
 634#ifdef CONFIG_FUNCTION_TRACER
 635        {
 636                .procname       = "ftrace_enabled",
 637                .data           = &ftrace_enabled,
 638                .maxlen         = sizeof(int),
 639                .mode           = 0644,
 640                .proc_handler   = ftrace_enable_sysctl,
 641        },
 642#endif
 643#ifdef CONFIG_STACK_TRACER
 644        {
 645                .procname       = "stack_tracer_enabled",
 646                .data           = &stack_tracer_enabled,
 647                .maxlen         = sizeof(int),
 648                .mode           = 0644,
 649                .proc_handler   = stack_trace_sysctl,
 650        },
 651#endif
 652#ifdef CONFIG_TRACING
 653        {
 654                .procname       = "ftrace_dump_on_oops",
 655                .data           = &ftrace_dump_on_oops,
 656                .maxlen         = sizeof(int),
 657                .mode           = 0644,
 658                .proc_handler   = proc_dointvec,
 659        },
 660        {
 661                .procname       = "traceoff_on_warning",
 662                .data           = &__disable_trace_on_warning,
 663                .maxlen         = sizeof(__disable_trace_on_warning),
 664                .mode           = 0644,
 665                .proc_handler   = proc_dointvec,
 666        },
 667        {
 668                .procname       = "tracepoint_printk",
 669                .data           = &tracepoint_printk,
 670                .maxlen         = sizeof(tracepoint_printk),
 671                .mode           = 0644,
 672                .proc_handler   = tracepoint_printk_sysctl,
 673        },
 674#endif
 675#ifdef CONFIG_KEXEC_CORE
 676        {
 677                .procname       = "kexec_load_disabled",
 678                .data           = &kexec_load_disabled,
 679                .maxlen         = sizeof(int),
 680                .mode           = 0644,
 681                /* only handle a transition from default "0" to "1" */
 682                .proc_handler   = proc_dointvec_minmax,
 683                .extra1         = &one,
 684                .extra2         = &one,
 685        },
 686#endif
 687#ifdef CONFIG_MODULES
 688        {
 689                .procname       = "modprobe",
 690                .data           = &modprobe_path,
 691                .maxlen         = KMOD_PATH_LEN,
 692                .mode           = 0644,
 693                .proc_handler   = proc_dostring,
 694        },
 695        {
 696                .procname       = "modules_disabled",
 697                .data           = &modules_disabled,
 698                .maxlen         = sizeof(int),
 699                .mode           = 0644,
 700                /* only handle a transition from default "0" to "1" */
 701                .proc_handler   = proc_dointvec_minmax,
 702                .extra1         = &one,
 703                .extra2         = &one,
 704        },
 705#endif
 706#ifdef CONFIG_UEVENT_HELPER
 707        {
 708                .procname       = "hotplug",
 709                .data           = &uevent_helper,
 710                .maxlen         = UEVENT_HELPER_PATH_LEN,
 711                .mode           = 0644,
 712                .proc_handler   = proc_dostring,
 713        },
 714#endif
 715#ifdef CONFIG_CHR_DEV_SG
 716        {
 717                .procname       = "sg-big-buff",
 718                .data           = &sg_big_buff,
 719                .maxlen         = sizeof (int),
 720                .mode           = 0444,
 721                .proc_handler   = proc_dointvec,
 722        },
 723#endif
 724#ifdef CONFIG_BSD_PROCESS_ACCT
 725        {
 726                .procname       = "acct",
 727                .data           = &acct_parm,
 728                .maxlen         = 3*sizeof(int),
 729                .mode           = 0644,
 730                .proc_handler   = proc_dointvec,
 731        },
 732#endif
 733#ifdef CONFIG_MAGIC_SYSRQ
 734        {
 735                .procname       = "sysrq",
 736                .data           = &__sysrq_enabled,
 737                .maxlen         = sizeof (int),
 738                .mode           = 0644,
 739                .proc_handler   = sysrq_sysctl_handler,
 740        },
 741#endif
 742#ifdef CONFIG_PROC_SYSCTL
 743        {
 744                .procname       = "cad_pid",
 745                .data           = NULL,
 746                .maxlen         = sizeof (int),
 747                .mode           = 0600,
 748                .proc_handler   = proc_do_cad_pid,
 749        },
 750#endif
 751        {
 752                .procname       = "threads-max",
 753                .data           = NULL,
 754                .maxlen         = sizeof(int),
 755                .mode           = 0644,
 756                .proc_handler   = sysctl_max_threads,
 757        },
 758        {
 759                .procname       = "random",
 760                .mode           = 0555,
 761                .child          = random_table,
 762        },
 763        {
 764                .procname       = "usermodehelper",
 765                .mode           = 0555,
 766                .child          = usermodehelper_table,
 767        },
 768#ifdef CONFIG_FW_LOADER_USER_HELPER
 769        {
 770                .procname       = "firmware_config",
 771                .mode           = 0555,
 772                .child          = firmware_config_table,
 773        },
 774#endif
 775        {
 776                .procname       = "overflowuid",
 777                .data           = &overflowuid,
 778                .maxlen         = sizeof(int),
 779                .mode           = 0644,
 780                .proc_handler   = proc_dointvec_minmax,
 781                .extra1         = &minolduid,
 782                .extra2         = &maxolduid,
 783        },
 784        {
 785                .procname       = "overflowgid",
 786                .data           = &overflowgid,
 787                .maxlen         = sizeof(int),
 788                .mode           = 0644,
 789                .proc_handler   = proc_dointvec_minmax,
 790                .extra1         = &minolduid,
 791                .extra2         = &maxolduid,
 792        },
 793#ifdef CONFIG_S390
 794#ifdef CONFIG_MATHEMU
 795        {
 796                .procname       = "ieee_emulation_warnings",
 797                .data           = &sysctl_ieee_emulation_warnings,
 798                .maxlen         = sizeof(int),
 799                .mode           = 0644,
 800                .proc_handler   = proc_dointvec,
 801        },
 802#endif
 803        {
 804                .procname       = "userprocess_debug",
 805                .data           = &show_unhandled_signals,
 806                .maxlen         = sizeof(int),
 807                .mode           = 0644,
 808                .proc_handler   = proc_dointvec,
 809        },
 810#endif
 811        {
 812                .procname       = "pid_max",
 813                .data           = &pid_max,
 814                .maxlen         = sizeof (int),
 815                .mode           = 0644,
 816                .proc_handler   = proc_dointvec_minmax,
 817                .extra1         = &pid_max_min,
 818                .extra2         = &pid_max_max,
 819        },
 820        {
 821                .procname       = "panic_on_oops",
 822                .data           = &panic_on_oops,
 823                .maxlen         = sizeof(int),
 824                .mode           = 0644,
 825                .proc_handler   = proc_dointvec,
 826        },
 827        {
 828                .procname       = "panic_print",
 829                .data           = &panic_print,
 830                .maxlen         = sizeof(unsigned long),
 831                .mode           = 0644,
 832                .proc_handler   = proc_doulongvec_minmax,
 833        },
 834#if defined CONFIG_PRINTK
 835        {
 836                .procname       = "printk",
 837                .data           = &console_loglevel,
 838                .maxlen         = 4*sizeof(int),
 839                .mode           = 0644,
 840                .proc_handler   = proc_dointvec,
 841        },
 842        {
 843                .procname       = "printk_ratelimit",
 844                .data           = &printk_ratelimit_state.interval,
 845                .maxlen         = sizeof(int),
 846                .mode           = 0644,
 847                .proc_handler   = proc_dointvec_jiffies,
 848        },
 849        {
 850                .procname       = "printk_ratelimit_burst",
 851                .data           = &printk_ratelimit_state.burst,
 852                .maxlen         = sizeof(int),
 853                .mode           = 0644,
 854                .proc_handler   = proc_dointvec,
 855        },
 856        {
 857                .procname       = "printk_delay",
 858                .data           = &printk_delay_msec,
 859                .maxlen         = sizeof(int),
 860                .mode           = 0644,
 861                .proc_handler   = proc_dointvec_minmax,
 862                .extra1         = &zero,
 863                .extra2         = &ten_thousand,
 864        },
 865        {
 866                .procname       = "printk_devkmsg",
 867                .data           = devkmsg_log_str,
 868                .maxlen         = DEVKMSG_STR_MAX_SIZE,
 869                .mode           = 0644,
 870                .proc_handler   = devkmsg_sysctl_set_loglvl,
 871        },
 872        {
 873                .procname       = "dmesg_restrict",
 874                .data           = &dmesg_restrict,
 875                .maxlen         = sizeof(int),
 876                .mode           = 0644,
 877                .proc_handler   = proc_dointvec_minmax_sysadmin,
 878                .extra1         = &zero,
 879                .extra2         = &one,
 880        },
 881        {
 882                .procname       = "kptr_restrict",
 883                .data           = &kptr_restrict,
 884                .maxlen         = sizeof(int),
 885                .mode           = 0644,
 886                .proc_handler   = proc_dointvec_minmax_sysadmin,
 887                .extra1         = &zero,
 888                .extra2         = &two,
 889        },
 890#endif
 891        {
 892                .procname       = "ngroups_max",
 893                .data           = &ngroups_max,
 894                .maxlen         = sizeof (int),
 895                .mode           = 0444,
 896                .proc_handler   = proc_dointvec,
 897        },
 898        {
 899                .procname       = "cap_last_cap",
 900                .data           = (void *)&cap_last_cap,
 901                .maxlen         = sizeof(int),
 902                .mode           = 0444,
 903                .proc_handler   = proc_dointvec,
 904        },
 905#if defined(CONFIG_LOCKUP_DETECTOR)
 906        {
 907                .procname       = "watchdog",
 908                .data           = &watchdog_user_enabled,
 909                .maxlen         = sizeof(int),
 910                .mode           = 0644,
 911                .proc_handler   = proc_watchdog,
 912                .extra1         = &zero,
 913                .extra2         = &one,
 914        },
 915        {
 916                .procname       = "watchdog_thresh",
 917                .data           = &watchdog_thresh,
 918                .maxlen         = sizeof(int),
 919                .mode           = 0644,
 920                .proc_handler   = proc_watchdog_thresh,
 921                .extra1         = &zero,
 922                .extra2         = &sixty,
 923        },
 924        {
 925                .procname       = "nmi_watchdog",
 926                .data           = &nmi_watchdog_user_enabled,
 927                .maxlen         = sizeof(int),
 928                .mode           = NMI_WATCHDOG_SYSCTL_PERM,
 929                .proc_handler   = proc_nmi_watchdog,
 930                .extra1         = &zero,
 931                .extra2         = &one,
 932        },
 933        {
 934                .procname       = "watchdog_cpumask",
 935                .data           = &watchdog_cpumask_bits,
 936                .maxlen         = NR_CPUS,
 937                .mode           = 0644,
 938                .proc_handler   = proc_watchdog_cpumask,
 939        },
 940#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 941        {
 942                .procname       = "soft_watchdog",
 943                .data           = &soft_watchdog_user_enabled,
 944                .maxlen         = sizeof(int),
 945                .mode           = 0644,
 946                .proc_handler   = proc_soft_watchdog,
 947                .extra1         = &zero,
 948                .extra2         = &one,
 949        },
 950        {
 951                .procname       = "softlockup_panic",
 952                .data           = &softlockup_panic,
 953                .maxlen         = sizeof(int),
 954                .mode           = 0644,
 955                .proc_handler   = proc_dointvec_minmax,
 956                .extra1         = &zero,
 957                .extra2         = &one,
 958        },
 959#ifdef CONFIG_SMP
 960        {
 961                .procname       = "softlockup_all_cpu_backtrace",
 962                .data           = &sysctl_softlockup_all_cpu_backtrace,
 963                .maxlen         = sizeof(int),
 964                .mode           = 0644,
 965                .proc_handler   = proc_dointvec_minmax,
 966                .extra1         = &zero,
 967                .extra2         = &one,
 968        },
 969#endif /* CONFIG_SMP */
 970#endif
 971#ifdef CONFIG_HARDLOCKUP_DETECTOR
 972        {
 973                .procname       = "hardlockup_panic",
 974                .data           = &hardlockup_panic,
 975                .maxlen         = sizeof(int),
 976                .mode           = 0644,
 977                .proc_handler   = proc_dointvec_minmax,
 978                .extra1         = &zero,
 979                .extra2         = &one,
 980        },
 981#ifdef CONFIG_SMP
 982        {
 983                .procname       = "hardlockup_all_cpu_backtrace",
 984                .data           = &sysctl_hardlockup_all_cpu_backtrace,
 985                .maxlen         = sizeof(int),
 986                .mode           = 0644,
 987                .proc_handler   = proc_dointvec_minmax,
 988                .extra1         = &zero,
 989                .extra2         = &one,
 990        },
 991#endif /* CONFIG_SMP */
 992#endif
 993#endif
 994
 995#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 996        {
 997                .procname       = "unknown_nmi_panic",
 998                .data           = &unknown_nmi_panic,
 999                .maxlen         = sizeof (int),
1000                .mode           = 0644,
1001                .proc_handler   = proc_dointvec,
1002        },
1003#endif
1004#if defined(CONFIG_X86)
1005        {
1006                .procname       = "panic_on_unrecovered_nmi",
1007                .data           = &panic_on_unrecovered_nmi,
1008                .maxlen         = sizeof(int),
1009                .mode           = 0644,
1010                .proc_handler   = proc_dointvec,
1011        },
1012        {
1013                .procname       = "panic_on_io_nmi",
1014                .data           = &panic_on_io_nmi,
1015                .maxlen         = sizeof(int),
1016                .mode           = 0644,
1017                .proc_handler   = proc_dointvec,
1018        },
1019#ifdef CONFIG_DEBUG_STACKOVERFLOW
1020        {
1021                .procname       = "panic_on_stackoverflow",
1022                .data           = &sysctl_panic_on_stackoverflow,
1023                .maxlen         = sizeof(int),
1024                .mode           = 0644,
1025                .proc_handler   = proc_dointvec,
1026        },
1027#endif
1028        {
1029                .procname       = "bootloader_type",
1030                .data           = &bootloader_type,
1031                .maxlen         = sizeof (int),
1032                .mode           = 0444,
1033                .proc_handler   = proc_dointvec,
1034        },
1035        {
1036                .procname       = "bootloader_version",
1037                .data           = &bootloader_version,
1038                .maxlen         = sizeof (int),
1039                .mode           = 0444,
1040                .proc_handler   = proc_dointvec,
1041        },
1042        {
1043                .procname       = "io_delay_type",
1044                .data           = &io_delay_type,
1045                .maxlen         = sizeof(int),
1046                .mode           = 0644,
1047                .proc_handler   = proc_dointvec,
1048        },
1049#endif
1050#if defined(CONFIG_MMU)
1051        {
1052                .procname       = "randomize_va_space",
1053                .data           = &randomize_va_space,
1054                .maxlen         = sizeof(int),
1055                .mode           = 0644,
1056                .proc_handler   = proc_dointvec,
1057        },
1058#endif
1059#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1060        {
1061                .procname       = "spin_retry",
1062                .data           = &spin_retry,
1063                .maxlen         = sizeof (int),
1064                .mode           = 0644,
1065                .proc_handler   = proc_dointvec,
1066        },
1067#endif
1068#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1069        {
1070                .procname       = "acpi_video_flags",
1071                .data           = &acpi_realmode_flags,
1072                .maxlen         = sizeof (unsigned long),
1073                .mode           = 0644,
1074                .proc_handler   = proc_doulongvec_minmax,
1075        },
1076#endif
1077#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1078        {
1079                .procname       = "ignore-unaligned-usertrap",
1080                .data           = &no_unaligned_warning,
1081                .maxlen         = sizeof (int),
1082                .mode           = 0644,
1083                .proc_handler   = proc_dointvec,
1084        },
1085#endif
1086#ifdef CONFIG_IA64
1087        {
1088                .procname       = "unaligned-dump-stack",
1089                .data           = &unaligned_dump_stack,
1090                .maxlen         = sizeof (int),
1091                .mode           = 0644,
1092                .proc_handler   = proc_dointvec,
1093        },
1094#endif
1095#ifdef CONFIG_DETECT_HUNG_TASK
1096        {
1097                .procname       = "hung_task_panic",
1098                .data           = &sysctl_hung_task_panic,
1099                .maxlen         = sizeof(int),
1100                .mode           = 0644,
1101                .proc_handler   = proc_dointvec_minmax,
1102                .extra1         = &zero,
1103                .extra2         = &one,
1104        },
1105        {
1106                .procname       = "hung_task_check_count",
1107                .data           = &sysctl_hung_task_check_count,
1108                .maxlen         = sizeof(int),
1109                .mode           = 0644,
1110                .proc_handler   = proc_dointvec_minmax,
1111                .extra1         = &zero,
1112        },
1113        {
1114                .procname       = "hung_task_timeout_secs",
1115                .data           = &sysctl_hung_task_timeout_secs,
1116                .maxlen         = sizeof(unsigned long),
1117                .mode           = 0644,
1118                .proc_handler   = proc_dohung_task_timeout_secs,
1119                .extra2         = &hung_task_timeout_max,
1120        },
1121        {
1122                .procname       = "hung_task_check_interval_secs",
1123                .data           = &sysctl_hung_task_check_interval_secs,
1124                .maxlen         = sizeof(unsigned long),
1125                .mode           = 0644,
1126                .proc_handler   = proc_dohung_task_timeout_secs,
1127                .extra2         = &hung_task_timeout_max,
1128        },
1129        {
1130                .procname       = "hung_task_warnings",
1131                .data           = &sysctl_hung_task_warnings,
1132                .maxlen         = sizeof(int),
1133                .mode           = 0644,
1134                .proc_handler   = proc_dointvec_minmax,
1135                .extra1         = &neg_one,
1136        },
1137#endif
1138#ifdef CONFIG_RT_MUTEXES
1139        {
1140                .procname       = "max_lock_depth",
1141                .data           = &max_lock_depth,
1142                .maxlen         = sizeof(int),
1143                .mode           = 0644,
1144                .proc_handler   = proc_dointvec,
1145        },
1146#endif
1147        {
1148                .procname       = "poweroff_cmd",
1149                .data           = &poweroff_cmd,
1150                .maxlen         = POWEROFF_CMD_PATH_LEN,
1151                .mode           = 0644,
1152                .proc_handler   = proc_dostring,
1153        },
1154#ifdef CONFIG_KEYS
1155        {
1156                .procname       = "keys",
1157                .mode           = 0555,
1158                .child          = key_sysctls,
1159        },
1160#endif
1161#ifdef CONFIG_PERF_EVENTS
1162        /*
1163         * User-space scripts rely on the existence of this file
1164         * as a feature check for perf_events being enabled.
1165         *
1166         * So it's an ABI, do not remove!
1167         */
1168        {
1169                .procname       = "perf_event_paranoid",
1170                .data           = &sysctl_perf_event_paranoid,
1171                .maxlen         = sizeof(sysctl_perf_event_paranoid),
1172                .mode           = 0644,
1173                .proc_handler   = proc_dointvec,
1174        },
1175        {
1176                .procname       = "perf_event_mlock_kb",
1177                .data           = &sysctl_perf_event_mlock,
1178                .maxlen         = sizeof(sysctl_perf_event_mlock),
1179                .mode           = 0644,
1180                .proc_handler   = proc_dointvec,
1181        },
1182        {
1183                .procname       = "perf_event_max_sample_rate",
1184                .data           = &sysctl_perf_event_sample_rate,
1185                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1186                .mode           = 0644,
1187                .proc_handler   = perf_proc_update_handler,
1188                .extra1         = &one,
1189        },
1190        {
1191                .procname       = "perf_cpu_time_max_percent",
1192                .data           = &sysctl_perf_cpu_time_max_percent,
1193                .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1194                .mode           = 0644,
1195                .proc_handler   = perf_cpu_time_max_percent_handler,
1196                .extra1         = &zero,
1197                .extra2         = &one_hundred,
1198        },
1199        {
1200                .procname       = "perf_event_max_stack",
1201                .data           = &sysctl_perf_event_max_stack,
1202                .maxlen         = sizeof(sysctl_perf_event_max_stack),
1203                .mode           = 0644,
1204                .proc_handler   = perf_event_max_stack_handler,
1205                .extra1         = &zero,
1206                .extra2         = &six_hundred_forty_kb,
1207        },
1208        {
1209                .procname       = "perf_event_max_contexts_per_stack",
1210                .data           = &sysctl_perf_event_max_contexts_per_stack,
1211                .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1212                .mode           = 0644,
1213                .proc_handler   = perf_event_max_stack_handler,
1214                .extra1         = &zero,
1215                .extra2         = &one_thousand,
1216        },
1217#endif
1218        {
1219                .procname       = "panic_on_warn",
1220                .data           = &panic_on_warn,
1221                .maxlen         = sizeof(int),
1222                .mode           = 0644,
1223                .proc_handler   = proc_dointvec_minmax,
1224                .extra1         = &zero,
1225                .extra2         = &one,
1226        },
1227#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1228        {
1229                .procname       = "timer_migration",
1230                .data           = &sysctl_timer_migration,
1231                .maxlen         = sizeof(unsigned int),
1232                .mode           = 0644,
1233                .proc_handler   = timer_migration_handler,
1234                .extra1         = &zero,
1235                .extra2         = &one,
1236        },
1237#endif
1238#ifdef CONFIG_BPF_SYSCALL
1239        {
1240                .procname       = "unprivileged_bpf_disabled",
1241                .data           = &sysctl_unprivileged_bpf_disabled,
1242                .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1243                .mode           = 0644,
1244                /* only handle a transition from default "0" to "1" */
1245                .proc_handler   = proc_dointvec_minmax,
1246                .extra1         = &one,
1247                .extra2         = &one,
1248        },
1249        {
1250                .procname       = "bpf_stats_enabled",
1251                .data           = &bpf_stats_enabled_key.key,
1252                .maxlen         = sizeof(bpf_stats_enabled_key),
1253                .mode           = 0644,
1254                .proc_handler   = proc_do_static_key,
1255        },
1256#endif
1257#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1258        {
1259                .procname       = "panic_on_rcu_stall",
1260                .data           = &sysctl_panic_on_rcu_stall,
1261                .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1262                .mode           = 0644,
1263                .proc_handler   = proc_dointvec_minmax,
1264                .extra1         = &zero,
1265                .extra2         = &one,
1266        },
1267#endif
1268#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1269        {
1270                .procname       = "stack_erasing",
1271                .data           = NULL,
1272                .maxlen         = sizeof(int),
1273                .mode           = 0600,
1274                .proc_handler   = stack_erasing_sysctl,
1275                .extra1         = &zero,
1276                .extra2         = &one,
1277        },
1278#endif
1279        { }
1280};
1281
1282static struct ctl_table vm_table[] = {
1283        {
1284                .procname       = "overcommit_memory",
1285                .data           = &sysctl_overcommit_memory,
1286                .maxlen         = sizeof(sysctl_overcommit_memory),
1287                .mode           = 0644,
1288                .proc_handler   = proc_dointvec_minmax,
1289                .extra1         = &zero,
1290                .extra2         = &two,
1291        },
1292        {
1293                .procname       = "panic_on_oom",
1294                .data           = &sysctl_panic_on_oom,
1295                .maxlen         = sizeof(sysctl_panic_on_oom),
1296                .mode           = 0644,
1297                .proc_handler   = proc_dointvec_minmax,
1298                .extra1         = &zero,
1299                .extra2         = &two,
1300        },
1301        {
1302                .procname       = "oom_kill_allocating_task",
1303                .data           = &sysctl_oom_kill_allocating_task,
1304                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1305                .mode           = 0644,
1306                .proc_handler   = proc_dointvec,
1307        },
1308        {
1309                .procname       = "oom_dump_tasks",
1310                .data           = &sysctl_oom_dump_tasks,
1311                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1312                .mode           = 0644,
1313                .proc_handler   = proc_dointvec,
1314        },
1315        {
1316                .procname       = "overcommit_ratio",
1317                .data           = &sysctl_overcommit_ratio,
1318                .maxlen         = sizeof(sysctl_overcommit_ratio),
1319                .mode           = 0644,
1320                .proc_handler   = overcommit_ratio_handler,
1321        },
1322        {
1323                .procname       = "overcommit_kbytes",
1324                .data           = &sysctl_overcommit_kbytes,
1325                .maxlen         = sizeof(sysctl_overcommit_kbytes),
1326                .mode           = 0644,
1327                .proc_handler   = overcommit_kbytes_handler,
1328        },
1329        {
1330                .procname       = "page-cluster", 
1331                .data           = &page_cluster,
1332                .maxlen         = sizeof(int),
1333                .mode           = 0644,
1334                .proc_handler   = proc_dointvec_minmax,
1335                .extra1         = &zero,
1336        },
1337        {
1338                .procname       = "dirty_background_ratio",
1339                .data           = &dirty_background_ratio,
1340                .maxlen         = sizeof(dirty_background_ratio),
1341                .mode           = 0644,
1342                .proc_handler   = dirty_background_ratio_handler,
1343                .extra1         = &zero,
1344                .extra2         = &one_hundred,
1345        },
1346        {
1347                .procname       = "dirty_background_bytes",
1348                .data           = &dirty_background_bytes,
1349                .maxlen         = sizeof(dirty_background_bytes),
1350                .mode           = 0644,
1351                .proc_handler   = dirty_background_bytes_handler,
1352                .extra1         = &one_ul,
1353        },
1354        {
1355                .procname       = "dirty_ratio",
1356                .data           = &vm_dirty_ratio,
1357                .maxlen         = sizeof(vm_dirty_ratio),
1358                .mode           = 0644,
1359                .proc_handler   = dirty_ratio_handler,
1360                .extra1         = &zero,
1361                .extra2         = &one_hundred,
1362        },
1363        {
1364                .procname       = "dirty_bytes",
1365                .data           = &vm_dirty_bytes,
1366                .maxlen         = sizeof(vm_dirty_bytes),
1367                .mode           = 0644,
1368                .proc_handler   = dirty_bytes_handler,
1369                .extra1         = &dirty_bytes_min,
1370        },
1371        {
1372                .procname       = "dirty_writeback_centisecs",
1373                .data           = &dirty_writeback_interval,
1374                .maxlen         = sizeof(dirty_writeback_interval),
1375                .mode           = 0644,
1376                .proc_handler   = dirty_writeback_centisecs_handler,
1377        },
1378        {
1379                .procname       = "dirty_expire_centisecs",
1380                .data           = &dirty_expire_interval,
1381                .maxlen         = sizeof(dirty_expire_interval),
1382                .mode           = 0644,
1383                .proc_handler   = proc_dointvec_minmax,
1384                .extra1         = &zero,
1385        },
1386        {
1387                .procname       = "dirtytime_expire_seconds",
1388                .data           = &dirtytime_expire_interval,
1389                .maxlen         = sizeof(dirtytime_expire_interval),
1390                .mode           = 0644,
1391                .proc_handler   = dirtytime_interval_handler,
1392                .extra1         = &zero,
1393        },
1394        {
1395                .procname       = "swappiness",
1396                .data           = &vm_swappiness,
1397                .maxlen         = sizeof(vm_swappiness),
1398                .mode           = 0644,
1399                .proc_handler   = proc_dointvec_minmax,
1400                .extra1         = &zero,
1401                .extra2         = &one_hundred,
1402        },
1403#ifdef CONFIG_HUGETLB_PAGE
1404        {
1405                .procname       = "nr_hugepages",
1406                .data           = NULL,
1407                .maxlen         = sizeof(unsigned long),
1408                .mode           = 0644,
1409                .proc_handler   = hugetlb_sysctl_handler,
1410        },
1411#ifdef CONFIG_NUMA
1412        {
1413                .procname       = "nr_hugepages_mempolicy",
1414                .data           = NULL,
1415                .maxlen         = sizeof(unsigned long),
1416                .mode           = 0644,
1417                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1418        },
1419        {
1420                .procname               = "numa_stat",
1421                .data                   = &sysctl_vm_numa_stat,
1422                .maxlen                 = sizeof(int),
1423                .mode                   = 0644,
1424                .proc_handler   = sysctl_vm_numa_stat_handler,
1425                .extra1                 = &zero,
1426                .extra2                 = &one,
1427        },
1428#endif
1429         {
1430                .procname       = "hugetlb_shm_group",
1431                .data           = &sysctl_hugetlb_shm_group,
1432                .maxlen         = sizeof(gid_t),
1433                .mode           = 0644,
1434                .proc_handler   = proc_dointvec,
1435         },
1436        {
1437                .procname       = "nr_overcommit_hugepages",
1438                .data           = NULL,
1439                .maxlen         = sizeof(unsigned long),
1440                .mode           = 0644,
1441                .proc_handler   = hugetlb_overcommit_handler,
1442        },
1443#endif
1444        {
1445                .procname       = "lowmem_reserve_ratio",
1446                .data           = &sysctl_lowmem_reserve_ratio,
1447                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1448                .mode           = 0644,
1449                .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1450        },
1451        {
1452                .procname       = "drop_caches",
1453                .data           = &sysctl_drop_caches,
1454                .maxlen         = sizeof(int),
1455                .mode           = 0644,
1456                .proc_handler   = drop_caches_sysctl_handler,
1457                .extra1         = &one,
1458                .extra2         = &four,
1459        },
1460#ifdef CONFIG_COMPACTION
1461        {
1462                .procname       = "compact_memory",
1463                .data           = &sysctl_compact_memory,
1464                .maxlen         = sizeof(int),
1465                .mode           = 0200,
1466                .proc_handler   = sysctl_compaction_handler,
1467        },
1468        {
1469                .procname       = "extfrag_threshold",
1470                .data           = &sysctl_extfrag_threshold,
1471                .maxlen         = sizeof(int),
1472                .mode           = 0644,
1473                .proc_handler   = proc_dointvec_minmax,
1474                .extra1         = &min_extfrag_threshold,
1475                .extra2         = &max_extfrag_threshold,
1476        },
1477        {
1478                .procname       = "compact_unevictable_allowed",
1479                .data           = &sysctl_compact_unevictable_allowed,
1480                .maxlen         = sizeof(int),
1481                .mode           = 0644,
1482                .proc_handler   = proc_dointvec,
1483                .extra1         = &zero,
1484                .extra2         = &one,
1485        },
1486
1487#endif /* CONFIG_COMPACTION */
1488        {
1489                .procname       = "min_free_kbytes",
1490                .data           = &min_free_kbytes,
1491                .maxlen         = sizeof(min_free_kbytes),
1492                .mode           = 0644,
1493                .proc_handler   = min_free_kbytes_sysctl_handler,
1494                .extra1         = &zero,
1495        },
1496        {
1497                .procname       = "watermark_boost_factor",
1498                .data           = &watermark_boost_factor,
1499                .maxlen         = sizeof(watermark_boost_factor),
1500                .mode           = 0644,
1501                .proc_handler   = watermark_boost_factor_sysctl_handler,
1502                .extra1         = &zero,
1503        },
1504        {
1505                .procname       = "watermark_scale_factor",
1506                .data           = &watermark_scale_factor,
1507                .maxlen         = sizeof(watermark_scale_factor),
1508                .mode           = 0644,
1509                .proc_handler   = watermark_scale_factor_sysctl_handler,
1510                .extra1         = &one,
1511                .extra2         = &one_thousand,
1512        },
1513        {
1514                .procname       = "percpu_pagelist_fraction",
1515                .data           = &percpu_pagelist_fraction,
1516                .maxlen         = sizeof(percpu_pagelist_fraction),
1517                .mode           = 0644,
1518                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1519                .extra1         = &zero,
1520        },
1521#ifdef CONFIG_MMU
1522        {
1523                .procname       = "max_map_count",
1524                .data           = &sysctl_max_map_count,
1525                .maxlen         = sizeof(sysctl_max_map_count),
1526                .mode           = 0644,
1527                .proc_handler   = proc_dointvec_minmax,
1528                .extra1         = &zero,
1529        },
1530#else
1531        {
1532                .procname       = "nr_trim_pages",
1533                .data           = &sysctl_nr_trim_pages,
1534                .maxlen         = sizeof(sysctl_nr_trim_pages),
1535                .mode           = 0644,
1536                .proc_handler   = proc_dointvec_minmax,
1537                .extra1         = &zero,
1538        },
1539#endif
1540        {
1541                .procname       = "laptop_mode",
1542                .data           = &laptop_mode,
1543                .maxlen         = sizeof(laptop_mode),
1544                .mode           = 0644,
1545                .proc_handler   = proc_dointvec_jiffies,
1546        },
1547        {
1548                .procname       = "block_dump",
1549                .data           = &block_dump,
1550                .maxlen         = sizeof(block_dump),
1551                .mode           = 0644,
1552                .proc_handler   = proc_dointvec,
1553                .extra1         = &zero,
1554        },
1555        {
1556                .procname       = "vfs_cache_pressure",
1557                .data           = &sysctl_vfs_cache_pressure,
1558                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1559                .mode           = 0644,
1560                .proc_handler   = proc_dointvec,
1561                .extra1         = &zero,
1562        },
1563#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1564        {
1565                .procname       = "legacy_va_layout",
1566                .data           = &sysctl_legacy_va_layout,
1567                .maxlen         = sizeof(sysctl_legacy_va_layout),
1568                .mode           = 0644,
1569                .proc_handler   = proc_dointvec,
1570                .extra1         = &zero,
1571        },
1572#endif
1573#ifdef CONFIG_NUMA
1574        {
1575                .procname       = "zone_reclaim_mode",
1576                .data           = &node_reclaim_mode,
1577                .maxlen         = sizeof(node_reclaim_mode),
1578                .mode           = 0644,
1579                .proc_handler   = proc_dointvec,
1580                .extra1         = &zero,
1581        },
1582        {
1583                .procname       = "min_unmapped_ratio",
1584                .data           = &sysctl_min_unmapped_ratio,
1585                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1586                .mode           = 0644,
1587                .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1588                .extra1         = &zero,
1589                .extra2         = &one_hundred,
1590        },
1591        {
1592                .procname       = "min_slab_ratio",
1593                .data           = &sysctl_min_slab_ratio,
1594                .maxlen         = sizeof(sysctl_min_slab_ratio),
1595                .mode           = 0644,
1596                .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1597                .extra1         = &zero,
1598                .extra2         = &one_hundred,
1599        },
1600#endif
1601#ifdef CONFIG_SMP
1602        {
1603                .procname       = "stat_interval",
1604                .data           = &sysctl_stat_interval,
1605                .maxlen         = sizeof(sysctl_stat_interval),
1606                .mode           = 0644,
1607                .proc_handler   = proc_dointvec_jiffies,
1608        },
1609        {
1610                .procname       = "stat_refresh",
1611                .data           = NULL,
1612                .maxlen         = 0,
1613                .mode           = 0600,
1614                .proc_handler   = vmstat_refresh,
1615        },
1616#endif
1617#ifdef CONFIG_MMU
1618        {
1619                .procname       = "mmap_min_addr",
1620                .data           = &dac_mmap_min_addr,
1621                .maxlen         = sizeof(unsigned long),
1622                .mode           = 0644,
1623                .proc_handler   = mmap_min_addr_handler,
1624        },
1625#endif
1626#ifdef CONFIG_NUMA
1627        {
1628                .procname       = "numa_zonelist_order",
1629                .data           = &numa_zonelist_order,
1630                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1631                .mode           = 0644,
1632                .proc_handler   = numa_zonelist_order_handler,
1633        },
1634#endif
1635#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1636   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1637        {
1638                .procname       = "vdso_enabled",
1639#ifdef CONFIG_X86_32
1640                .data           = &vdso32_enabled,
1641                .maxlen         = sizeof(vdso32_enabled),
1642#else
1643                .data           = &vdso_enabled,
1644                .maxlen         = sizeof(vdso_enabled),
1645#endif
1646                .mode           = 0644,
1647                .proc_handler   = proc_dointvec,
1648                .extra1         = &zero,
1649        },
1650#endif
1651#ifdef CONFIG_HIGHMEM
1652        {
1653                .procname       = "highmem_is_dirtyable",
1654                .data           = &vm_highmem_is_dirtyable,
1655                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1656                .mode           = 0644,
1657                .proc_handler   = proc_dointvec_minmax,
1658                .extra1         = &zero,
1659                .extra2         = &one,
1660        },
1661#endif
1662#ifdef CONFIG_MEMORY_FAILURE
1663        {
1664                .procname       = "memory_failure_early_kill",
1665                .data           = &sysctl_memory_failure_early_kill,
1666                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1667                .mode           = 0644,
1668                .proc_handler   = proc_dointvec_minmax,
1669                .extra1         = &zero,
1670                .extra2         = &one,
1671        },
1672        {
1673                .procname       = "memory_failure_recovery",
1674                .data           = &sysctl_memory_failure_recovery,
1675                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1676                .mode           = 0644,
1677                .proc_handler   = proc_dointvec_minmax,
1678                .extra1         = &zero,
1679                .extra2         = &one,
1680        },
1681#endif
1682        {
1683                .procname       = "user_reserve_kbytes",
1684                .data           = &sysctl_user_reserve_kbytes,
1685                .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1686                .mode           = 0644,
1687                .proc_handler   = proc_doulongvec_minmax,
1688        },
1689        {
1690                .procname       = "admin_reserve_kbytes",
1691                .data           = &sysctl_admin_reserve_kbytes,
1692                .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1693                .mode           = 0644,
1694                .proc_handler   = proc_doulongvec_minmax,
1695        },
1696#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1697        {
1698                .procname       = "mmap_rnd_bits",
1699                .data           = &mmap_rnd_bits,
1700                .maxlen         = sizeof(mmap_rnd_bits),
1701                .mode           = 0600,
1702                .proc_handler   = proc_dointvec_minmax,
1703                .extra1         = (void *)&mmap_rnd_bits_min,
1704                .extra2         = (void *)&mmap_rnd_bits_max,
1705        },
1706#endif
1707#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1708        {
1709                .procname       = "mmap_rnd_compat_bits",
1710                .data           = &mmap_rnd_compat_bits,
1711                .maxlen         = sizeof(mmap_rnd_compat_bits),
1712                .mode           = 0600,
1713                .proc_handler   = proc_dointvec_minmax,
1714                .extra1         = (void *)&mmap_rnd_compat_bits_min,
1715                .extra2         = (void *)&mmap_rnd_compat_bits_max,
1716        },
1717#endif
1718#ifdef CONFIG_USERFAULTFD
1719        {
1720                .procname       = "unprivileged_userfaultfd",
1721                .data           = &sysctl_unprivileged_userfaultfd,
1722                .maxlen         = sizeof(sysctl_unprivileged_userfaultfd),
1723                .mode           = 0644,
1724                .proc_handler   = proc_dointvec_minmax,
1725                .extra1         = &zero,
1726                .extra2         = &one,
1727        },
1728#endif
1729        { }
1730};
1731
1732static struct ctl_table fs_table[] = {
1733        {
1734                .procname       = "inode-nr",
1735                .data           = &inodes_stat,
1736                .maxlen         = 2*sizeof(long),
1737                .mode           = 0444,
1738                .proc_handler   = proc_nr_inodes,
1739        },
1740        {
1741                .procname       = "inode-state",
1742                .data           = &inodes_stat,
1743                .maxlen         = 7*sizeof(long),
1744                .mode           = 0444,
1745                .proc_handler   = proc_nr_inodes,
1746        },
1747        {
1748                .procname       = "file-nr",
1749                .data           = &files_stat,
1750                .maxlen         = sizeof(files_stat),
1751                .mode           = 0444,
1752                .proc_handler   = proc_nr_files,
1753        },
1754        {
1755                .procname       = "file-max",
1756                .data           = &files_stat.max_files,
1757                .maxlen         = sizeof(files_stat.max_files),
1758                .mode           = 0644,
1759                .proc_handler   = proc_doulongvec_minmax,
1760                .extra1         = &zero_ul,
1761                .extra2         = &long_max,
1762        },
1763        {
1764                .procname       = "nr_open",
1765                .data           = &sysctl_nr_open,
1766                .maxlen         = sizeof(unsigned int),
1767                .mode           = 0644,
1768                .proc_handler   = proc_dointvec_minmax,
1769                .extra1         = &sysctl_nr_open_min,
1770                .extra2         = &sysctl_nr_open_max,
1771        },
1772        {
1773                .procname       = "dentry-state",
1774                .data           = &dentry_stat,
1775                .maxlen         = 6*sizeof(long),
1776                .mode           = 0444,
1777                .proc_handler   = proc_nr_dentry,
1778        },
1779        {
1780                .procname       = "overflowuid",
1781                .data           = &fs_overflowuid,
1782                .maxlen         = sizeof(int),
1783                .mode           = 0644,
1784                .proc_handler   = proc_dointvec_minmax,
1785                .extra1         = &minolduid,
1786                .extra2         = &maxolduid,
1787        },
1788        {
1789                .procname       = "overflowgid",
1790                .data           = &fs_overflowgid,
1791                .maxlen         = sizeof(int),
1792                .mode           = 0644,
1793                .proc_handler   = proc_dointvec_minmax,
1794                .extra1         = &minolduid,
1795                .extra2         = &maxolduid,
1796        },
1797#ifdef CONFIG_FILE_LOCKING
1798        {
1799                .procname       = "leases-enable",
1800                .data           = &leases_enable,
1801                .maxlen         = sizeof(int),
1802                .mode           = 0644,
1803                .proc_handler   = proc_dointvec,
1804        },
1805#endif
1806#ifdef CONFIG_DNOTIFY
1807        {
1808                .procname       = "dir-notify-enable",
1809                .data           = &dir_notify_enable,
1810                .maxlen         = sizeof(int),
1811                .mode           = 0644,
1812                .proc_handler   = proc_dointvec,
1813        },
1814#endif
1815#ifdef CONFIG_MMU
1816#ifdef CONFIG_FILE_LOCKING
1817        {
1818                .procname       = "lease-break-time",
1819                .data           = &lease_break_time,
1820                .maxlen         = sizeof(int),
1821                .mode           = 0644,
1822                .proc_handler   = proc_dointvec,
1823        },
1824#endif
1825#ifdef CONFIG_AIO
1826        {
1827                .procname       = "aio-nr",
1828                .data           = &aio_nr,
1829                .maxlen         = sizeof(aio_nr),
1830                .mode           = 0444,
1831                .proc_handler   = proc_doulongvec_minmax,
1832        },
1833        {
1834                .procname       = "aio-max-nr",
1835                .data           = &aio_max_nr,
1836                .maxlen         = sizeof(aio_max_nr),
1837                .mode           = 0644,
1838                .proc_handler   = proc_doulongvec_minmax,
1839        },
1840#endif /* CONFIG_AIO */
1841#ifdef CONFIG_INOTIFY_USER
1842        {
1843                .procname       = "inotify",
1844                .mode           = 0555,
1845                .child          = inotify_table,
1846        },
1847#endif  
1848#ifdef CONFIG_EPOLL
1849        {
1850                .procname       = "epoll",
1851                .mode           = 0555,
1852                .child          = epoll_table,
1853        },
1854#endif
1855#endif
1856        {
1857                .procname       = "protected_symlinks",
1858                .data           = &sysctl_protected_symlinks,
1859                .maxlen         = sizeof(int),
1860                .mode           = 0600,
1861                .proc_handler   = proc_dointvec_minmax,
1862                .extra1         = &zero,
1863                .extra2         = &one,
1864        },
1865        {
1866                .procname       = "protected_hardlinks",
1867                .data           = &sysctl_protected_hardlinks,
1868                .maxlen         = sizeof(int),
1869                .mode           = 0600,
1870                .proc_handler   = proc_dointvec_minmax,
1871                .extra1         = &zero,
1872                .extra2         = &one,
1873        },
1874        {
1875                .procname       = "protected_fifos",
1876                .data           = &sysctl_protected_fifos,
1877                .maxlen         = sizeof(int),
1878                .mode           = 0600,
1879                .proc_handler   = proc_dointvec_minmax,
1880                .extra1         = &zero,
1881                .extra2         = &two,
1882        },
1883        {
1884                .procname       = "protected_regular",
1885                .data           = &sysctl_protected_regular,
1886                .maxlen         = sizeof(int),
1887                .mode           = 0600,
1888                .proc_handler   = proc_dointvec_minmax,
1889                .extra1         = &zero,
1890                .extra2         = &two,
1891        },
1892        {
1893                .procname       = "suid_dumpable",
1894                .data           = &suid_dumpable,
1895                .maxlen         = sizeof(int),
1896                .mode           = 0644,
1897                .proc_handler   = proc_dointvec_minmax_coredump,
1898                .extra1         = &zero,
1899                .extra2         = &two,
1900        },
1901#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1902        {
1903                .procname       = "binfmt_misc",
1904                .mode           = 0555,
1905                .child          = sysctl_mount_point,
1906        },
1907#endif
1908        {
1909                .procname       = "pipe-max-size",
1910                .data           = &pipe_max_size,
1911                .maxlen         = sizeof(pipe_max_size),
1912                .mode           = 0644,
1913                .proc_handler   = proc_dopipe_max_size,
1914        },
1915        {
1916                .procname       = "pipe-user-pages-hard",
1917                .data           = &pipe_user_pages_hard,
1918                .maxlen         = sizeof(pipe_user_pages_hard),
1919                .mode           = 0644,
1920                .proc_handler   = proc_doulongvec_minmax,
1921        },
1922        {
1923                .procname       = "pipe-user-pages-soft",
1924                .data           = &pipe_user_pages_soft,
1925                .maxlen         = sizeof(pipe_user_pages_soft),
1926                .mode           = 0644,
1927                .proc_handler   = proc_doulongvec_minmax,
1928        },
1929        {
1930                .procname       = "mount-max",
1931                .data           = &sysctl_mount_max,
1932                .maxlen         = sizeof(unsigned int),
1933                .mode           = 0644,
1934                .proc_handler   = proc_dointvec_minmax,
1935                .extra1         = &one,
1936        },
1937        { }
1938};
1939
1940static struct ctl_table debug_table[] = {
1941#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1942        {
1943                .procname       = "exception-trace",
1944                .data           = &show_unhandled_signals,
1945                .maxlen         = sizeof(int),
1946                .mode           = 0644,
1947                .proc_handler   = proc_dointvec
1948        },
1949#endif
1950#if defined(CONFIG_OPTPROBES)
1951        {
1952                .procname       = "kprobes-optimization",
1953                .data           = &sysctl_kprobes_optimization,
1954                .maxlen         = sizeof(int),
1955                .mode           = 0644,
1956                .proc_handler   = proc_kprobes_optimization_handler,
1957                .extra1         = &zero,
1958                .extra2         = &one,
1959        },
1960#endif
1961        { }
1962};
1963
1964static struct ctl_table dev_table[] = {
1965        { }
1966};
1967
1968int __init sysctl_init(void)
1969{
1970        struct ctl_table_header *hdr;
1971
1972        hdr = register_sysctl_table(sysctl_base_table);
1973        kmemleak_not_leak(hdr);
1974        return 0;
1975}
1976
1977#endif /* CONFIG_SYSCTL */
1978
1979/*
1980 * /proc/sys support
1981 */
1982
1983#ifdef CONFIG_PROC_SYSCTL
1984
1985static int _proc_do_string(char *data, int maxlen, int write,
1986                           char __user *buffer,
1987                           size_t *lenp, loff_t *ppos)
1988{
1989        size_t len;
1990        char __user *p;
1991        char c;
1992
1993        if (!data || !maxlen || !*lenp) {
1994                *lenp = 0;
1995                return 0;
1996        }
1997
1998        if (write) {
1999                if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
2000                        /* Only continue writes not past the end of buffer. */
2001                        len = strlen(data);
2002                        if (len > maxlen - 1)
2003                                len = maxlen - 1;
2004
2005                        if (*ppos > len)
2006                                return 0;
2007                        len = *ppos;
2008                } else {
2009                        /* Start writing from beginning of buffer. */
2010                        len = 0;
2011                }
2012
2013                *ppos += *lenp;
2014                p = buffer;
2015                while ((p - buffer) < *lenp && len < maxlen - 1) {
2016                        if (get_user(c, p++))
2017                                return -EFAULT;
2018                        if (c == 0 || c == '\n')
2019                                break;
2020                        data[len++] = c;
2021                }
2022                data[len] = 0;
2023        } else {
2024                len = strlen(data);
2025                if (len > maxlen)
2026                        len = maxlen;
2027
2028                if (*ppos > len) {
2029                        *lenp = 0;
2030                        return 0;
2031                }
2032
2033                data += *ppos;
2034                len  -= *ppos;
2035
2036                if (len > *lenp)
2037                        len = *lenp;
2038                if (len)
2039                        if (copy_to_user(buffer, data, len))
2040                                return -EFAULT;
2041                if (len < *lenp) {
2042                        if (put_user('\n', buffer + len))
2043                                return -EFAULT;
2044                        len++;
2045                }
2046                *lenp = len;
2047                *ppos += len;
2048        }
2049        return 0;
2050}
2051
2052static void warn_sysctl_write(struct ctl_table *table)
2053{
2054        pr_warn_once("%s wrote to %s when file position was not 0!\n"
2055                "This will not be supported in the future. To silence this\n"
2056                "warning, set kernel.sysctl_writes_strict = -1\n",
2057                current->comm, table->procname);
2058}
2059
2060/**
2061 * proc_first_pos_non_zero_ignore - check if first position is allowed
2062 * @ppos: file position
2063 * @table: the sysctl table
2064 *
2065 * Returns true if the first position is non-zero and the sysctl_writes_strict
2066 * mode indicates this is not allowed for numeric input types. String proc
2067 * handlers can ignore the return value.
2068 */
2069static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2070                                           struct ctl_table *table)
2071{
2072        if (!*ppos)
2073                return false;
2074
2075        switch (sysctl_writes_strict) {
2076        case SYSCTL_WRITES_STRICT:
2077                return true;
2078        case SYSCTL_WRITES_WARN:
2079                warn_sysctl_write(table);
2080                return false;
2081        default:
2082                return false;
2083        }
2084}
2085
2086/**
2087 * proc_dostring - read a string sysctl
2088 * @table: the sysctl table
2089 * @write: %TRUE if this is a write to the sysctl file
2090 * @buffer: the user buffer
2091 * @lenp: the size of the user buffer
2092 * @ppos: file position
2093 *
2094 * Reads/writes a string from/to the user buffer. If the kernel
2095 * buffer provided is not large enough to hold the string, the
2096 * string is truncated. The copied string is %NULL-terminated.
2097 * If the string is being read by the user process, it is copied
2098 * and a newline '\n' is added. It is truncated if the buffer is
2099 * not large enough.
2100 *
2101 * Returns 0 on success.
2102 */
2103int proc_dostring(struct ctl_table *table, int write,
2104                  void __user *buffer, size_t *lenp, loff_t *ppos)
2105{
2106        if (write)
2107                proc_first_pos_non_zero_ignore(ppos, table);
2108
2109        return _proc_do_string((char *)(table->data), table->maxlen, write,
2110                               (char __user *)buffer, lenp, ppos);
2111}
2112
2113static size_t proc_skip_spaces(char **buf)
2114{
2115        size_t ret;
2116        char *tmp = skip_spaces(*buf);
2117        ret = tmp - *buf;
2118        *buf = tmp;
2119        return ret;
2120}
2121
2122static void proc_skip_char(char **buf, size_t *size, const char v)
2123{
2124        while (*size) {
2125                if (**buf != v)
2126                        break;
2127                (*size)--;
2128                (*buf)++;
2129        }
2130}
2131
2132/**
2133 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2134 *                   fail on overflow
2135 *
2136 * @cp: kernel buffer containing the string to parse
2137 * @endp: pointer to store the trailing characters
2138 * @base: the base to use
2139 * @res: where the parsed integer will be stored
2140 *
2141 * In case of success 0 is returned and @res will contain the parsed integer,
2142 * @endp will hold any trailing characters.
2143 * This function will fail the parse on overflow. If there wasn't an overflow
2144 * the function will defer the decision what characters count as invalid to the
2145 * caller.
2146 */
2147static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2148                           unsigned long *res)
2149{
2150        unsigned long long result;
2151        unsigned int rv;
2152
2153        cp = _parse_integer_fixup_radix(cp, &base);
2154        rv = _parse_integer(cp, base, &result);
2155        if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2156                return -ERANGE;
2157
2158        cp += rv;
2159
2160        if (endp)
2161                *endp = (char *)cp;
2162
2163        *res = (unsigned long)result;
2164        return 0;
2165}
2166
2167#define TMPBUFLEN 22
2168/**
2169 * proc_get_long - reads an ASCII formatted integer from a user buffer
2170 *
2171 * @buf: a kernel buffer
2172 * @size: size of the kernel buffer
2173 * @val: this is where the number will be stored
2174 * @neg: set to %TRUE if number is negative
2175 * @perm_tr: a vector which contains the allowed trailers
2176 * @perm_tr_len: size of the perm_tr vector
2177 * @tr: pointer to store the trailer character
2178 *
2179 * In case of success %0 is returned and @buf and @size are updated with
2180 * the amount of bytes read. If @tr is non-NULL and a trailing
2181 * character exists (size is non-zero after returning from this
2182 * function), @tr is updated with the trailing character.
2183 */
2184static int proc_get_long(char **buf, size_t *size,
2185                          unsigned long *val, bool *neg,
2186                          const char *perm_tr, unsigned perm_tr_len, char *tr)
2187{
2188        int len;
2189        char *p, tmp[TMPBUFLEN];
2190
2191        if (!*size)
2192                return -EINVAL;
2193
2194        len = *size;
2195        if (len > TMPBUFLEN - 1)
2196                len = TMPBUFLEN - 1;
2197
2198        memcpy(tmp, *buf, len);
2199
2200        tmp[len] = 0;
2201        p = tmp;
2202        if (*p == '-' && *size > 1) {
2203                *neg = true;
2204                p++;
2205        } else
2206                *neg = false;
2207        if (!isdigit(*p))
2208                return -EINVAL;
2209
2210        if (strtoul_lenient(p, &p, 0, val))
2211                return -EINVAL;
2212
2213        len = p - tmp;
2214
2215        /* We don't know if the next char is whitespace thus we may accept
2216         * invalid integers (e.g. 1234...a) or two integers instead of one
2217         * (e.g. 123...1). So lets not allow such large numbers. */
2218        if (len == TMPBUFLEN - 1)
2219                return -EINVAL;
2220
2221        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2222                return -EINVAL;
2223
2224        if (tr && (len < *size))
2225                *tr = *p;
2226
2227        *buf += len;
2228        *size -= len;
2229
2230        return 0;
2231}
2232
2233/**
2234 * proc_put_long - converts an integer to a decimal ASCII formatted string
2235 *
2236 * @buf: the user buffer
2237 * @size: the size of the user buffer
2238 * @val: the integer to be converted
2239 * @neg: sign of the number, %TRUE for negative
2240 *
2241 * In case of success %0 is returned and @buf and @size are updated with
2242 * the amount of bytes written.
2243 */
2244static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2245                          bool neg)
2246{
2247        int len;
2248        char tmp[TMPBUFLEN], *p = tmp;
2249
2250        sprintf(p, "%s%lu", neg ? "-" : "", val);
2251        len = strlen(tmp);
2252        if (len > *size)
2253                len = *size;
2254        if (copy_to_user(*buf, tmp, len))
2255                return -EFAULT;
2256        *size -= len;
2257        *buf += len;
2258        return 0;
2259}
2260#undef TMPBUFLEN
2261
2262static int proc_put_char(void __user **buf, size_t *size, char c)
2263{
2264        if (*size) {
2265                char __user **buffer = (char __user **)buf;
2266                if (put_user(c, *buffer))
2267                        return -EFAULT;
2268                (*size)--, (*buffer)++;
2269                *buf = *buffer;
2270        }
2271        return 0;
2272}
2273
2274static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2275                                 int *valp,
2276                                 int write, void *data)
2277{
2278        if (write) {
2279                if (*negp) {
2280                        if (*lvalp > (unsigned long) INT_MAX + 1)
2281                                return -EINVAL;
2282                        *valp = -*lvalp;
2283                } else {
2284                        if (*lvalp > (unsigned long) INT_MAX)
2285                                return -EINVAL;
2286                        *valp = *lvalp;
2287                }
2288        } else {
2289                int val = *valp;
2290                if (val < 0) {
2291                        *negp = true;
2292                        *lvalp = -(unsigned long)val;
2293                } else {
2294                        *negp = false;
2295                        *lvalp = (unsigned long)val;
2296                }
2297        }
2298        return 0;
2299}
2300
2301static int do_proc_douintvec_conv(unsigned long *lvalp,
2302                                  unsigned int *valp,
2303                                  int write, void *data)
2304{
2305        if (write) {
2306                if (*lvalp > UINT_MAX)
2307                        return -EINVAL;
2308                *valp = *lvalp;
2309        } else {
2310                unsigned int val = *valp;
2311                *lvalp = (unsigned long)val;
2312        }
2313        return 0;
2314}
2315
2316static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2317
2318static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2319                  int write, void __user *buffer,
2320                  size_t *lenp, loff_t *ppos,
2321                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2322                              int write, void *data),
2323                  void *data)
2324{
2325        int *i, vleft, first = 1, err = 0;
2326        size_t left;
2327        char *kbuf = NULL, *p;
2328        
2329        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2330                *lenp = 0;
2331                return 0;
2332        }
2333        
2334        i = (int *) tbl_data;
2335        vleft = table->maxlen / sizeof(*i);
2336        left = *lenp;
2337
2338        if (!conv)
2339                conv = do_proc_dointvec_conv;
2340
2341        if (write) {
2342                if (proc_first_pos_non_zero_ignore(ppos, table))
2343                        goto out;
2344
2345                if (left > PAGE_SIZE - 1)
2346                        left = PAGE_SIZE - 1;
2347                p = kbuf = memdup_user_nul(buffer, left);
2348                if (IS_ERR(kbuf))
2349                        return PTR_ERR(kbuf);
2350        }
2351
2352        for (; left && vleft--; i++, first=0) {
2353                unsigned long lval;
2354                bool neg;
2355
2356                if (write) {
2357                        left -= proc_skip_spaces(&p);
2358
2359                        if (!left)
2360                                break;
2361                        err = proc_get_long(&p, &left, &lval, &neg,
2362                                             proc_wspace_sep,
2363                                             sizeof(proc_wspace_sep), NULL);
2364                        if (err)
2365                                break;
2366                        if (conv(&neg, &lval, i, 1, data)) {
2367                                err = -EINVAL;
2368                                break;
2369                        }
2370                } else {
2371                        if (conv(&neg, &lval, i, 0, data)) {
2372                                err = -EINVAL;
2373                                break;
2374                        }
2375                        if (!first)
2376                                err = proc_put_char(&buffer, &left, '\t');
2377                        if (err)
2378                                break;
2379                        err = proc_put_long(&buffer, &left, lval, neg);
2380                        if (err)
2381                                break;
2382                }
2383        }
2384
2385        if (!write && !first && left && !err)
2386                err = proc_put_char(&buffer, &left, '\n');
2387        if (write && !err && left)
2388                left -= proc_skip_spaces(&p);
2389        if (write) {
2390                kfree(kbuf);
2391                if (first)
2392                        return err ? : -EINVAL;
2393        }
2394        *lenp -= left;
2395out:
2396        *ppos += *lenp;
2397        return err;
2398}
2399
2400static int do_proc_dointvec(struct ctl_table *table, int write,
2401                  void __user *buffer, size_t *lenp, loff_t *ppos,
2402                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2403                              int write, void *data),
2404                  void *data)
2405{
2406        return __do_proc_dointvec(table->data, table, write,
2407                        buffer, lenp, ppos, conv, data);
2408}
2409
2410static int do_proc_douintvec_w(unsigned int *tbl_data,
2411                               struct ctl_table *table,
2412                               void __user *buffer,
2413                               size_t *lenp, loff_t *ppos,
2414                               int (*conv)(unsigned long *lvalp,
2415                                           unsigned int *valp,
2416                                           int write, void *data),
2417                               void *data)
2418{
2419        unsigned long lval;
2420        int err = 0;
2421        size_t left;
2422        bool neg;
2423        char *kbuf = NULL, *p;
2424
2425        left = *lenp;
2426
2427        if (proc_first_pos_non_zero_ignore(ppos, table))
2428                goto bail_early;
2429
2430        if (left > PAGE_SIZE - 1)
2431                left = PAGE_SIZE - 1;
2432
2433        p = kbuf = memdup_user_nul(buffer, left);
2434        if (IS_ERR(kbuf))
2435                return -EINVAL;
2436
2437        left -= proc_skip_spaces(&p);
2438        if (!left) {
2439                err = -EINVAL;
2440                goto out_free;
2441        }
2442
2443        err = proc_get_long(&p, &left, &lval, &neg,
2444                             proc_wspace_sep,
2445                             sizeof(proc_wspace_sep), NULL);
2446        if (err || neg) {
2447                err = -EINVAL;
2448                goto out_free;
2449        }
2450
2451        if (conv(&lval, tbl_data, 1, data)) {
2452                err = -EINVAL;
2453                goto out_free;
2454        }
2455
2456        if (!err && left)
2457                left -= proc_skip_spaces(&p);
2458
2459out_free:
2460        kfree(kbuf);
2461        if (err)
2462                return -EINVAL;
2463
2464        return 0;
2465
2466        /* This is in keeping with old __do_proc_dointvec() */
2467bail_early:
2468        *ppos += *lenp;
2469        return err;
2470}
2471
2472static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2473                               size_t *lenp, loff_t *ppos,
2474                               int (*conv)(unsigned long *lvalp,
2475                                           unsigned int *valp,
2476                                           int write, void *data),
2477                               void *data)
2478{
2479        unsigned long lval;
2480        int err = 0;
2481        size_t left;
2482
2483        left = *lenp;
2484
2485        if (conv(&lval, tbl_data, 0, data)) {
2486                err = -EINVAL;
2487                goto out;
2488        }
2489
2490        err = proc_put_long(&buffer, &left, lval, false);
2491        if (err || !left)
2492                goto out;
2493
2494        err = proc_put_char(&buffer, &left, '\n');
2495
2496out:
2497        *lenp -= left;
2498        *ppos += *lenp;
2499
2500        return err;
2501}
2502
2503static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2504                               int write, void __user *buffer,
2505                               size_t *lenp, loff_t *ppos,
2506                               int (*conv)(unsigned long *lvalp,
2507                                           unsigned int *valp,
2508                                           int write, void *data),
2509                               void *data)
2510{
2511        unsigned int *i, vleft;
2512
2513        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2514                *lenp = 0;
2515                return 0;
2516        }
2517
2518        i = (unsigned int *) tbl_data;
2519        vleft = table->maxlen / sizeof(*i);
2520
2521        /*
2522         * Arrays are not supported, keep this simple. *Do not* add
2523         * support for them.
2524         */
2525        if (vleft != 1) {
2526                *lenp = 0;
2527                return -EINVAL;
2528        }
2529
2530        if (!conv)
2531                conv = do_proc_douintvec_conv;
2532
2533        if (write)
2534                return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2535                                           conv, data);
2536        return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2537}
2538
2539static int do_proc_douintvec(struct ctl_table *table, int write,
2540                             void __user *buffer, size_t *lenp, loff_t *ppos,
2541                             int (*conv)(unsigned long *lvalp,
2542                                         unsigned int *valp,
2543                                         int write, void *data),
2544                             void *data)
2545{
2546        return __do_proc_douintvec(table->data, table, write,
2547                                   buffer, lenp, ppos, conv, data);
2548}
2549
2550/**
2551 * proc_dointvec - read a vector of integers
2552 * @table: the sysctl table
2553 * @write: %TRUE if this is a write to the sysctl file
2554 * @buffer: the user buffer
2555 * @lenp: the size of the user buffer
2556 * @ppos: file position
2557 *
2558 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2559 * values from/to the user buffer, treated as an ASCII string. 
2560 *
2561 * Returns 0 on success.
2562 */
2563int proc_dointvec(struct ctl_table *table, int write,
2564                     void __user *buffer, size_t *lenp, loff_t *ppos)
2565{
2566        return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2567}
2568
2569/**
2570 * proc_douintvec - read a vector of unsigned integers
2571 * @table: the sysctl table
2572 * @write: %TRUE if this is a write to the sysctl file
2573 * @buffer: the user buffer
2574 * @lenp: the size of the user buffer
2575 * @ppos: file position
2576 *
2577 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2578 * values from/to the user buffer, treated as an ASCII string.
2579 *
2580 * Returns 0 on success.
2581 */
2582int proc_douintvec(struct ctl_table *table, int write,
2583                     void __user *buffer, size_t *lenp, loff_t *ppos)
2584{
2585        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2586                                 do_proc_douintvec_conv, NULL);
2587}
2588
2589/*
2590 * Taint values can only be increased
2591 * This means we can safely use a temporary.
2592 */
2593static int proc_taint(struct ctl_table *table, int write,
2594                               void __user *buffer, size_t *lenp, loff_t *ppos)
2595{
2596        struct ctl_table t;
2597        unsigned long tmptaint = get_taint();
2598        int err;
2599
2600        if (write && !capable(CAP_SYS_ADMIN))
2601                return -EPERM;
2602
2603        t = *table;
2604        t.data = &tmptaint;
2605        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2606        if (err < 0)
2607                return err;
2608
2609        if (write) {
2610                /*
2611                 * Poor man's atomic or. Not worth adding a primitive
2612                 * to everyone's atomic.h for this
2613                 */
2614                int i;
2615                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2616                        if ((tmptaint >> i) & 1)
2617                                add_taint(i, LOCKDEP_STILL_OK);
2618                }
2619        }
2620
2621        return err;
2622}
2623
2624#ifdef CONFIG_PRINTK
2625static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2626                                void __user *buffer, size_t *lenp, loff_t *ppos)
2627{
2628        if (write && !capable(CAP_SYS_ADMIN))
2629                return -EPERM;
2630
2631        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2632}
2633#endif
2634
2635/**
2636 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2637 * @min: pointer to minimum allowable value
2638 * @max: pointer to maximum allowable value
2639 *
2640 * The do_proc_dointvec_minmax_conv_param structure provides the
2641 * minimum and maximum values for doing range checking for those sysctl
2642 * parameters that use the proc_dointvec_minmax() handler.
2643 */
2644struct do_proc_dointvec_minmax_conv_param {
2645        int *min;
2646        int *max;
2647};
2648
2649static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2650                                        int *valp,
2651                                        int write, void *data)
2652{
2653        int tmp, ret;
2654        struct do_proc_dointvec_minmax_conv_param *param = data;
2655        /*
2656         * If writing, first do so via a temporary local int so we can
2657         * bounds-check it before touching *valp.
2658         */
2659        int *ip = write ? &tmp : valp;
2660
2661        ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2662        if (ret)
2663                return ret;
2664
2665        if (write) {
2666                if ((param->min && *param->min > tmp) ||
2667                    (param->max && *param->max < tmp))
2668                        return -EINVAL;
2669                *valp = tmp;
2670        }
2671
2672        return 0;
2673}
2674
2675/**
2676 * proc_dointvec_minmax - read a vector of integers with min/max values
2677 * @table: the sysctl table
2678 * @write: %TRUE if this is a write to the sysctl file
2679 * @buffer: the user buffer
2680 * @lenp: the size of the user buffer
2681 * @ppos: file position
2682 *
2683 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2684 * values from/to the user buffer, treated as an ASCII string.
2685 *
2686 * This routine will ensure the values are within the range specified by
2687 * table->extra1 (min) and table->extra2 (max).
2688 *
2689 * Returns 0 on success or -EINVAL on write when the range check fails.
2690 */
2691int proc_dointvec_minmax(struct ctl_table *table, int write,
2692                  void __user *buffer, size_t *lenp, loff_t *ppos)
2693{
2694        struct do_proc_dointvec_minmax_conv_param param = {
2695                .min = (int *) table->extra1,
2696                .max = (int *) table->extra2,
2697        };
2698        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2699                                do_proc_dointvec_minmax_conv, &param);
2700}
2701
2702/**
2703 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2704 * @min: pointer to minimum allowable value
2705 * @max: pointer to maximum allowable value
2706 *
2707 * The do_proc_douintvec_minmax_conv_param structure provides the
2708 * minimum and maximum values for doing range checking for those sysctl
2709 * parameters that use the proc_douintvec_minmax() handler.
2710 */
2711struct do_proc_douintvec_minmax_conv_param {
2712        unsigned int *min;
2713        unsigned int *max;
2714};
2715
2716static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2717                                         unsigned int *valp,
2718                                         int write, void *data)
2719{
2720        int ret;
2721        unsigned int tmp;
2722        struct do_proc_douintvec_minmax_conv_param *param = data;
2723        /* write via temporary local uint for bounds-checking */
2724        unsigned int *up = write ? &tmp : valp;
2725
2726        ret = do_proc_douintvec_conv(lvalp, up, write, data);
2727        if (ret)
2728                return ret;
2729
2730        if (write) {
2731                if ((param->min && *param->min > tmp) ||
2732                    (param->max && *param->max < tmp))
2733                        return -ERANGE;
2734
2735                *valp = tmp;
2736        }
2737
2738        return 0;
2739}
2740
2741/**
2742 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2743 * @table: the sysctl table
2744 * @write: %TRUE if this is a write to the sysctl file
2745 * @buffer: the user buffer
2746 * @lenp: the size of the user buffer
2747 * @ppos: file position
2748 *
2749 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2750 * values from/to the user buffer, treated as an ASCII string. Negative
2751 * strings are not allowed.
2752 *
2753 * This routine will ensure the values are within the range specified by
2754 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2755 * check for UINT_MAX to avoid having to support wrap around uses from
2756 * userspace.
2757 *
2758 * Returns 0 on success or -ERANGE on write when the range check fails.
2759 */
2760int proc_douintvec_minmax(struct ctl_table *table, int write,
2761                          void __user *buffer, size_t *lenp, loff_t *ppos)
2762{
2763        struct do_proc_douintvec_minmax_conv_param param = {
2764                .min = (unsigned int *) table->extra1,
2765                .max = (unsigned int *) table->extra2,
2766        };
2767        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2768                                 do_proc_douintvec_minmax_conv, &param);
2769}
2770
2771static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2772                                        unsigned int *valp,
2773                                        int write, void *data)
2774{
2775        if (write) {
2776                unsigned int val;
2777
2778                val = round_pipe_size(*lvalp);
2779                if (val == 0)
2780                        return -EINVAL;
2781
2782                *valp = val;
2783        } else {
2784                unsigned int val = *valp;
2785                *lvalp = (unsigned long) val;
2786        }
2787
2788        return 0;
2789}
2790
2791static int proc_dopipe_max_size(struct ctl_table *table, int write,
2792                                void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
2794        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2795                                 do_proc_dopipe_max_size_conv, NULL);
2796}
2797
2798static void validate_coredump_safety(void)
2799{
2800#ifdef CONFIG_COREDUMP
2801        if (suid_dumpable == SUID_DUMP_ROOT &&
2802            core_pattern[0] != '/' && core_pattern[0] != '|') {
2803                printk(KERN_WARNING
2804"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2805"Pipe handler or fully qualified core dump path required.\n"
2806"Set kernel.core_pattern before fs.suid_dumpable.\n"
2807                );
2808        }
2809#endif
2810}
2811
2812static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2813                void __user *buffer, size_t *lenp, loff_t *ppos)
2814{
2815        int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2816        if (!error)
2817                validate_coredump_safety();
2818        return error;
2819}
2820
2821#ifdef CONFIG_COREDUMP
2822static int proc_dostring_coredump(struct ctl_table *table, int write,
2823                  void __user *buffer, size_t *lenp, loff_t *ppos)
2824{
2825        int error = proc_dostring(table, write, buffer, lenp, ppos);
2826        if (!error)
2827                validate_coredump_safety();
2828        return error;
2829}
2830#endif
2831
2832static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2833                                     void __user *buffer,
2834                                     size_t *lenp, loff_t *ppos,
2835                                     unsigned long convmul,
2836                                     unsigned long convdiv)
2837{
2838        unsigned long *i, *min, *max;
2839        int vleft, first = 1, err = 0;
2840        size_t left;
2841        char *kbuf = NULL, *p;
2842
2843        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2844                *lenp = 0;
2845                return 0;
2846        }
2847
2848        i = (unsigned long *) data;
2849        min = (unsigned long *) table->extra1;
2850        max = (unsigned long *) table->extra2;
2851        vleft = table->maxlen / sizeof(unsigned long);
2852        left = *lenp;
2853
2854        if (write) {
2855                if (proc_first_pos_non_zero_ignore(ppos, table))
2856                        goto out;
2857
2858                if (left > PAGE_SIZE - 1)
2859                        left = PAGE_SIZE - 1;
2860                p = kbuf = memdup_user_nul(buffer, left);
2861                if (IS_ERR(kbuf))
2862                        return PTR_ERR(kbuf);
2863        }
2864
2865        for (; left && vleft--; i++, first = 0) {
2866                unsigned long val;
2867
2868                if (write) {
2869                        bool neg;
2870
2871                        left -= proc_skip_spaces(&p);
2872                        if (!left)
2873                                break;
2874
2875                        err = proc_get_long(&p, &left, &val, &neg,
2876                                             proc_wspace_sep,
2877                                             sizeof(proc_wspace_sep), NULL);
2878                        if (err)
2879                                break;
2880                        if (neg)
2881                                continue;
2882                        val = convmul * val / convdiv;
2883                        if ((min && val < *min) || (max && val > *max)) {
2884                                err = -EINVAL;
2885                                break;
2886                        }
2887                        *i = val;
2888                } else {
2889                        val = convdiv * (*i) / convmul;
2890                        if (!first) {
2891                                err = proc_put_char(&buffer, &left, '\t');
2892                                if (err)
2893                                        break;
2894                        }
2895                        err = proc_put_long(&buffer, &left, val, false);
2896                        if (err)
2897                                break;
2898                }
2899        }
2900
2901        if (!write && !first && left && !err)
2902                err = proc_put_char(&buffer, &left, '\n');
2903        if (write && !err)
2904                left -= proc_skip_spaces(&p);
2905        if (write) {
2906                kfree(kbuf);
2907                if (first)
2908                        return err ? : -EINVAL;
2909        }
2910        *lenp -= left;
2911out:
2912        *ppos += *lenp;
2913        return err;
2914}
2915
2916static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2917                                     void __user *buffer,
2918                                     size_t *lenp, loff_t *ppos,
2919                                     unsigned long convmul,
2920                                     unsigned long convdiv)
2921{
2922        return __do_proc_doulongvec_minmax(table->data, table, write,
2923                        buffer, lenp, ppos, convmul, convdiv);
2924}
2925
2926/**
2927 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2928 * @table: the sysctl table
2929 * @write: %TRUE if this is a write to the sysctl file
2930 * @buffer: the user buffer
2931 * @lenp: the size of the user buffer
2932 * @ppos: file position
2933 *
2934 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2935 * values from/to the user buffer, treated as an ASCII string.
2936 *
2937 * This routine will ensure the values are within the range specified by
2938 * table->extra1 (min) and table->extra2 (max).
2939 *
2940 * Returns 0 on success.
2941 */
2942int proc_doulongvec_minmax(struct ctl_table *table, int write,
2943                           void __user *buffer, size_t *lenp, loff_t *ppos)
2944{
2945    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2946}
2947
2948/**
2949 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2950 * @table: the sysctl table
2951 * @write: %TRUE if this is a write to the sysctl file
2952 * @buffer: the user buffer
2953 * @lenp: the size of the user buffer
2954 * @ppos: file position
2955 *
2956 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2957 * values from/to the user buffer, treated as an ASCII string. The values
2958 * are treated as milliseconds, and converted to jiffies when they are stored.
2959 *
2960 * This routine will ensure the values are within the range specified by
2961 * table->extra1 (min) and table->extra2 (max).
2962 *
2963 * Returns 0 on success.
2964 */
2965int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2966                                      void __user *buffer,
2967                                      size_t *lenp, loff_t *ppos)
2968{
2969    return do_proc_doulongvec_minmax(table, write, buffer,
2970                                     lenp, ppos, HZ, 1000l);
2971}
2972
2973
2974static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2975                                         int *valp,
2976                                         int write, void *data)
2977{
2978        if (write) {
2979                if (*lvalp > INT_MAX / HZ)
2980                        return 1;
2981                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2982        } else {
2983                int val = *valp;
2984                unsigned long lval;
2985                if (val < 0) {
2986                        *negp = true;
2987                        lval = -(unsigned long)val;
2988                } else {
2989                        *negp = false;
2990                        lval = (unsigned long)val;
2991                }
2992                *lvalp = lval / HZ;
2993        }
2994        return 0;
2995}
2996
2997static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2998                                                int *valp,
2999                                                int write, void *data)
3000{
3001        if (write) {
3002                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
3003                        return 1;
3004                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
3005        } else {
3006                int val = *valp;
3007                unsigned long lval;
3008                if (val < 0) {
3009                        *negp = true;
3010                        lval = -(unsigned long)val;
3011                } else {
3012                        *negp = false;
3013                        lval = (unsigned long)val;
3014                }
3015                *lvalp = jiffies_to_clock_t(lval);
3016        }
3017        return 0;
3018}
3019
3020static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3021                                            int *valp,
3022                                            int write, void *data)
3023{
3024        if (write) {
3025                unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3026
3027                if (jif > INT_MAX)
3028                        return 1;
3029                *valp = (int)jif;
3030        } else {
3031                int val = *valp;
3032                unsigned long lval;
3033                if (val < 0) {
3034                        *negp = true;
3035                        lval = -(unsigned long)val;
3036                } else {
3037                        *negp = false;
3038                        lval = (unsigned long)val;
3039                }
3040                *lvalp = jiffies_to_msecs(lval);
3041        }
3042        return 0;
3043}
3044
3045/**
3046 * proc_dointvec_jiffies - read a vector of integers as seconds
3047 * @table: the sysctl table
3048 * @write: %TRUE if this is a write to the sysctl file
3049 * @buffer: the user buffer
3050 * @lenp: the size of the user buffer
3051 * @ppos: file position
3052 *
3053 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3054 * values from/to the user buffer, treated as an ASCII string. 
3055 * The values read are assumed to be in seconds, and are converted into
3056 * jiffies.
3057 *
3058 * Returns 0 on success.
3059 */
3060int proc_dointvec_jiffies(struct ctl_table *table, int write,
3061                          void __user *buffer, size_t *lenp, loff_t *ppos)
3062{
3063    return do_proc_dointvec(table,write,buffer,lenp,ppos,
3064                            do_proc_dointvec_jiffies_conv,NULL);
3065}
3066
3067/**
3068 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3069 * @table: the sysctl table
3070 * @write: %TRUE if this is a write to the sysctl file
3071 * @buffer: the user buffer
3072 * @lenp: the size of the user buffer
3073 * @ppos: pointer to the file position
3074 *
3075 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3076 * values from/to the user buffer, treated as an ASCII string. 
3077 * The values read are assumed to be in 1/USER_HZ seconds, and 
3078 * are converted into jiffies.
3079 *
3080 * Returns 0 on success.
3081 */
3082int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3083                                 void __user *buffer, size_t *lenp, loff_t *ppos)
3084{
3085    return do_proc_dointvec(table,write,buffer,lenp,ppos,
3086                            do_proc_dointvec_userhz_jiffies_conv,NULL);
3087}
3088
3089/**
3090 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3091 * @table: the sysctl table
3092 * @write: %TRUE if this is a write to the sysctl file
3093 * @buffer: the user buffer
3094 * @lenp: the size of the user buffer
3095 * @ppos: file position
3096 * @ppos: the current position in the file
3097 *
3098 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3099 * values from/to the user buffer, treated as an ASCII string. 
3100 * The values read are assumed to be in 1/1000 seconds, and 
3101 * are converted into jiffies.
3102 *
3103 * Returns 0 on success.
3104 */
3105int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3106                             void __user *buffer, size_t *lenp, loff_t *ppos)
3107{
3108        return do_proc_dointvec(table, write, buffer, lenp, ppos,
3109                                do_proc_dointvec_ms_jiffies_conv, NULL);
3110}
3111
3112static int proc_do_cad_pid(struct ctl_table *table, int write,
3113                           void __user *buffer, size_t *lenp, loff_t *ppos)
3114{
3115        struct pid *new_pid;
3116        pid_t tmp;
3117        int r;
3118
3119        tmp = pid_vnr(cad_pid);
3120
3121        r = __do_proc_dointvec(&tmp, table, write, buffer,
3122                               lenp, ppos, NULL, NULL);
3123        if (r || !write)
3124                return r;
3125
3126        new_pid = find_get_pid(tmp);
3127        if (!new_pid)
3128                return -ESRCH;
3129
3130        put_pid(xchg(&cad_pid, new_pid));
3131        return 0;
3132}
3133
3134/**
3135 * proc_do_large_bitmap - read/write from/to a large bitmap
3136 * @table: the sysctl table
3137 * @write: %TRUE if this is a write to the sysctl file
3138 * @buffer: the user buffer
3139 * @lenp: the size of the user buffer
3140 * @ppos: file position
3141 *
3142 * The bitmap is stored at table->data and the bitmap length (in bits)
3143 * in table->maxlen.
3144 *
3145 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3146 * large bitmaps may be represented in a compact manner. Writing into
3147 * the file will clear the bitmap then update it with the given input.
3148 *
3149 * Returns 0 on success.
3150 */
3151int proc_do_large_bitmap(struct ctl_table *table, int write,
3152                         void __user *buffer, size_t *lenp, loff_t *ppos)
3153{
3154        int err = 0;
3155        bool first = 1;
3156        size_t left = *lenp;
3157        unsigned long bitmap_len = table->maxlen;
3158        unsigned long *bitmap = *(unsigned long **) table->data;
3159        unsigned long *tmp_bitmap = NULL;
3160        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3161
3162        if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3163                *lenp = 0;
3164                return 0;
3165        }
3166
3167        if (write) {
3168                char *kbuf, *p;
3169                size_t skipped = 0;
3170
3171                if (left > PAGE_SIZE - 1) {
3172                        left = PAGE_SIZE - 1;
3173                        /* How much of the buffer we'll skip this pass */
3174                        skipped = *lenp - left;
3175                }
3176
3177                p = kbuf = memdup_user_nul(buffer, left);
3178                if (IS_ERR(kbuf))
3179                        return PTR_ERR(kbuf);
3180
3181                tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
3182                if (!tmp_bitmap) {
3183                        kfree(kbuf);
3184                        return -ENOMEM;
3185                }
3186                proc_skip_char(&p, &left, '\n');
3187                while (!err && left) {
3188                        unsigned long val_a, val_b;
3189                        bool neg;
3190                        size_t saved_left;
3191
3192                        /* In case we stop parsing mid-number, we can reset */
3193                        saved_left = left;
3194                        err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3195                                             sizeof(tr_a), &c);
3196                        /*
3197                         * If we consumed the entirety of a truncated buffer or
3198                         * only one char is left (may be a "-"), then stop here,
3199                         * reset, & come back for more.
3200                         */
3201                        if ((left <= 1) && skipped) {
3202                                left = saved_left;
3203                                break;
3204                        }
3205
3206                        if (err)
3207                                break;
3208                        if (val_a >= bitmap_len || neg) {
3209                                err = -EINVAL;
3210                                break;
3211                        }
3212
3213                        val_b = val_a;
3214                        if (left) {
3215                                p++;
3216                                left--;
3217                        }
3218
3219                        if (c == '-') {
3220                                err = proc_get_long(&p, &left, &val_b,
3221                                                     &neg, tr_b, sizeof(tr_b),
3222                                                     &c);
3223                                /*
3224                                 * If we consumed all of a truncated buffer or
3225                                 * then stop here, reset, & come back for more.
3226                                 */
3227                                if (!left && skipped) {
3228                                        left = saved_left;
3229                                        break;
3230                                }
3231
3232                                if (err)
3233                                        break;
3234                                if (val_b >= bitmap_len || neg ||
3235                                    val_a > val_b) {
3236                                        err = -EINVAL;
3237                                        break;
3238                                }
3239                                if (left) {
3240                                        p++;
3241                                        left--;
3242                                }
3243                        }
3244
3245                        bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3246                        first = 0;
3247                        proc_skip_char(&p, &left, '\n');
3248                }
3249                kfree(kbuf);
3250                left += skipped;
3251        } else {
3252                unsigned long bit_a, bit_b = 0;
3253
3254                while (left) {
3255                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3256                        if (bit_a >= bitmap_len)
3257                                break;
3258                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
3259                                                   bit_a + 1) - 1;
3260
3261                        if (!first) {
3262                                err = proc_put_char(&buffer, &left, ',');
3263                                if (err)
3264                                        break;
3265                        }
3266                        err = proc_put_long(&buffer, &left, bit_a, false);
3267                        if (err)
3268                                break;
3269                        if (bit_a != bit_b) {
3270                                err = proc_put_char(&buffer, &left, '-');
3271                                if (err)
3272                                        break;
3273                                err = proc_put_long(&buffer, &left, bit_b, false);
3274                                if (err)
3275                                        break;
3276                        }
3277
3278                        first = 0; bit_b++;
3279                }
3280                if (!err)
3281                        err = proc_put_char(&buffer, &left, '\n');
3282        }
3283
3284        if (!err) {
3285                if (write) {
3286                        if (*ppos)
3287                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3288                        else
3289                                bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3290                }
3291                *lenp -= left;
3292                *ppos += *lenp;
3293        }
3294
3295        bitmap_free(tmp_bitmap);
3296        return err;
3297}
3298
3299#else /* CONFIG_PROC_SYSCTL */
3300
3301int proc_dostring(struct ctl_table *table, int write,
3302                  void __user *buffer, size_t *lenp, loff_t *ppos)
3303{
3304        return -ENOSYS;
3305}
3306
3307int proc_dointvec(struct ctl_table *table, int write,
3308                  void __user *buffer, size_t *lenp, loff_t *ppos)
3309{
3310        return -ENOSYS;
3311}
3312
3313int proc_douintvec(struct ctl_table *table, int write,
3314                  void __user *buffer, size_t *lenp, loff_t *ppos)
3315{
3316        return -ENOSYS;
3317}
3318
3319int proc_dointvec_minmax(struct ctl_table *table, int write,
3320                    void __user *buffer, size_t *lenp, loff_t *ppos)
3321{
3322        return -ENOSYS;
3323}
3324
3325int proc_douintvec_minmax(struct ctl_table *table, int write,
3326                          void __user *buffer, size_t *lenp, loff_t *ppos)
3327{
3328        return -ENOSYS;
3329}
3330
3331int proc_dointvec_jiffies(struct ctl_table *table, int write,
3332                    void __user *buffer, size_t *lenp, loff_t *ppos)
3333{
3334        return -ENOSYS;
3335}
3336
3337int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3338                    void __user *buffer, size_t *lenp, loff_t *ppos)
3339{
3340        return -ENOSYS;
3341}
3342
3343int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3344                             void __user *buffer, size_t *lenp, loff_t *ppos)
3345{
3346        return -ENOSYS;
3347}
3348
3349int proc_doulongvec_minmax(struct ctl_table *table, int write,
3350                    void __user *buffer, size_t *lenp, loff_t *ppos)
3351{
3352        return -ENOSYS;
3353}
3354
3355int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3356                                      void __user *buffer,
3357                                      size_t *lenp, loff_t *ppos)
3358{
3359    return -ENOSYS;
3360}
3361
3362int proc_do_large_bitmap(struct ctl_table *table, int write,
3363                         void __user *buffer, size_t *lenp, loff_t *ppos)
3364{
3365        return -ENOSYS;
3366}
3367
3368#endif /* CONFIG_PROC_SYSCTL */
3369
3370#if defined(CONFIG_SYSCTL)
3371int proc_do_static_key(struct ctl_table *table, int write,
3372                       void __user *buffer, size_t *lenp,
3373                       loff_t *ppos)
3374{
3375        struct static_key *key = (struct static_key *)table->data;
3376        static DEFINE_MUTEX(static_key_mutex);
3377        int val, ret;
3378        struct ctl_table tmp = {
3379                .data   = &val,
3380                .maxlen = sizeof(val),
3381                .mode   = table->mode,
3382                .extra1 = &zero,
3383                .extra2 = &one,
3384        };
3385
3386        if (write && !capable(CAP_SYS_ADMIN))
3387                return -EPERM;
3388
3389        mutex_lock(&static_key_mutex);
3390        val = static_key_enabled(key);
3391        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3392        if (write && !ret) {
3393                if (val)
3394                        static_key_enable(key);
3395                else
3396                        static_key_disable(key);
3397        }
3398        mutex_unlock(&static_key_mutex);
3399        return ret;
3400}
3401#endif
3402/*
3403 * No sense putting this after each symbol definition, twice,
3404 * exception granted :-)
3405 */
3406EXPORT_SYMBOL(proc_dointvec);
3407EXPORT_SYMBOL(proc_douintvec);
3408EXPORT_SYMBOL(proc_dointvec_jiffies);
3409EXPORT_SYMBOL(proc_dointvec_minmax);
3410EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3411EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3412EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3413EXPORT_SYMBOL(proc_dostring);
3414EXPORT_SYMBOL(proc_doulongvec_minmax);
3415EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3416EXPORT_SYMBOL(proc_do_large_bitmap);
3417