linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/mm.h>
  23#include <linux/swap.h>
  24#include <linux/slab.h>
  25#include <linux/sysctl.h>
  26#include <linux/bitmap.h>
  27#include <linux/signal.h>
  28#include <linux/printk.h>
  29#include <linux/proc_fs.h>
  30#include <linux/security.h>
  31#include <linux/ctype.h>
  32#include <linux/kmemcheck.h>
  33#include <linux/kmemleak.h>
  34#include <linux/fs.h>
  35#include <linux/init.h>
  36#include <linux/kernel.h>
  37#include <linux/kobject.h>
  38#include <linux/net.h>
  39#include <linux/sysrq.h>
  40#include <linux/highuid.h>
  41#include <linux/writeback.h>
  42#include <linux/ratelimit.h>
  43#include <linux/compaction.h>
  44#include <linux/hugetlb.h>
  45#include <linux/initrd.h>
  46#include <linux/key.h>
  47#include <linux/times.h>
  48#include <linux/limits.h>
  49#include <linux/dcache.h>
  50#include <linux/dnotify.h>
  51#include <linux/syscalls.h>
  52#include <linux/vmstat.h>
  53#include <linux/nfs_fs.h>
  54#include <linux/acpi.h>
  55#include <linux/reboot.h>
  56#include <linux/ftrace.h>
  57#include <linux/perf_event.h>
  58#include <linux/kprobes.h>
  59#include <linux/pipe_fs_i.h>
  60#include <linux/oom.h>
  61#include <linux/kmod.h>
  62#include <linux/capability.h>
  63#include <linux/binfmts.h>
  64#include <linux/sched/sysctl.h>
  65#include <linux/kexec.h>
  66
  67#include <asm/uaccess.h>
  68#include <asm/processor.h>
  69
  70#ifdef CONFIG_X86
  71#include <asm/nmi.h>
  72#include <asm/stacktrace.h>
  73#include <asm/io.h>
  74#endif
  75#ifdef CONFIG_SPARC
  76#include <asm/setup.h>
  77#endif
  78#ifdef CONFIG_BSD_PROCESS_ACCT
  79#include <linux/acct.h>
  80#endif
  81#ifdef CONFIG_RT_MUTEXES
  82#include <linux/rtmutex.h>
  83#endif
  84#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
  85#include <linux/lockdep.h>
  86#endif
  87#ifdef CONFIG_CHR_DEV_SG
  88#include <scsi/sg.h>
  89#endif
  90
  91#ifdef CONFIG_LOCKUP_DETECTOR
  92#include <linux/nmi.h>
  93#endif
  94
  95
  96#if defined(CONFIG_SYSCTL)
  97
  98/* External variables not in a header file. */
  99extern int max_threads;
 100extern int suid_dumpable;
 101#ifdef CONFIG_COREDUMP
 102extern int core_uses_pid;
 103extern char core_pattern[];
 104extern unsigned int core_pipe_limit;
 105#endif
 106extern int pid_max;
 107extern int pid_max_min, pid_max_max;
 108extern int percpu_pagelist_fraction;
 109extern int compat_log;
 110extern int latencytop_enabled;
 111extern int sysctl_nr_open_min, sysctl_nr_open_max;
 112#ifndef CONFIG_MMU
 113extern int sysctl_nr_trim_pages;
 114#endif
 115
 116/* Constants used for minimum and  maximum */
 117#ifdef CONFIG_LOCKUP_DETECTOR
 118static int sixty = 60;
 119#endif
 120
 121static int __maybe_unused neg_one = -1;
 122
 123static int zero;
 124static int __maybe_unused one = 1;
 125static int __maybe_unused two = 2;
 126static int __maybe_unused four = 4;
 127static unsigned long one_ul = 1;
 128static int one_hundred = 100;
 129#ifdef CONFIG_PRINTK
 130static int ten_thousand = 10000;
 131#endif
 132
 133/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 134static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 135
 136/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 137static int maxolduid = 65535;
 138static int minolduid;
 139
 140static int ngroups_max = NGROUPS_MAX;
 141static const int cap_last_cap = CAP_LAST_CAP;
 142
 143/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
 144#ifdef CONFIG_DETECT_HUNG_TASK
 145static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 146#endif
 147
 148#ifdef CONFIG_INOTIFY_USER
 149#include <linux/inotify.h>
 150#endif
 151#ifdef CONFIG_SPARC
 152#endif
 153
 154#ifdef __hppa__
 155extern int pwrsw_enabled;
 156#endif
 157
 158#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 159extern int unaligned_enabled;
 160#endif
 161
 162#ifdef CONFIG_IA64
 163extern int unaligned_dump_stack;
 164#endif
 165
 166#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 167extern int no_unaligned_warning;
 168#endif
 169
 170#ifdef CONFIG_PROC_SYSCTL
 171
 172#define SYSCTL_WRITES_LEGACY    -1
 173#define SYSCTL_WRITES_WARN       0
 174#define SYSCTL_WRITES_STRICT     1
 175
 176static int sysctl_writes_strict = SYSCTL_WRITES_WARN;
 177
 178static int proc_do_cad_pid(struct ctl_table *table, int write,
 179                  void __user *buffer, size_t *lenp, loff_t *ppos);
 180static int proc_taint(struct ctl_table *table, int write,
 181                               void __user *buffer, size_t *lenp, loff_t *ppos);
 182#endif
 183
 184#ifdef CONFIG_PRINTK
 185static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 186                                void __user *buffer, size_t *lenp, loff_t *ppos);
 187#endif
 188
 189static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 190                void __user *buffer, size_t *lenp, loff_t *ppos);
 191#ifdef CONFIG_COREDUMP
 192static int proc_dostring_coredump(struct ctl_table *table, int write,
 193                void __user *buffer, size_t *lenp, loff_t *ppos);
 194#endif
 195
 196#ifdef CONFIG_MAGIC_SYSRQ
 197/* Note: sysrq code uses it's own private copy */
 198static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 199
 200static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 201                                void __user *buffer, size_t *lenp,
 202                                loff_t *ppos)
 203{
 204        int error;
 205
 206        error = proc_dointvec(table, write, buffer, lenp, ppos);
 207        if (error)
 208                return error;
 209
 210        if (write)
 211                sysrq_toggle_support(__sysrq_enabled);
 212
 213        return 0;
 214}
 215
 216#endif
 217
 218static struct ctl_table kern_table[];
 219static struct ctl_table vm_table[];
 220static struct ctl_table fs_table[];
 221static struct ctl_table debug_table[];
 222static struct ctl_table dev_table[];
 223extern struct ctl_table random_table[];
 224#ifdef CONFIG_EPOLL
 225extern struct ctl_table epoll_table[];
 226#endif
 227
 228#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 229int sysctl_legacy_va_layout;
 230#endif
 231
 232/* The default sysctl tables: */
 233
 234static struct ctl_table sysctl_base_table[] = {
 235        {
 236                .procname       = "kernel",
 237                .mode           = 0555,
 238                .child          = kern_table,
 239        },
 240        {
 241                .procname       = "vm",
 242                .mode           = 0555,
 243                .child          = vm_table,
 244        },
 245        {
 246                .procname       = "fs",
 247                .mode           = 0555,
 248                .child          = fs_table,
 249        },
 250        {
 251                .procname       = "debug",
 252                .mode           = 0555,
 253                .child          = debug_table,
 254        },
 255        {
 256                .procname       = "dev",
 257                .mode           = 0555,
 258                .child          = dev_table,
 259        },
 260        { }
 261};
 262
 263#ifdef CONFIG_SCHED_DEBUG
 264static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 265static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 266static int min_wakeup_granularity_ns;                   /* 0 usecs */
 267static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 268#ifdef CONFIG_SMP
 269static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 270static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 271#endif /* CONFIG_SMP */
 272#endif /* CONFIG_SCHED_DEBUG */
 273
 274#ifdef CONFIG_COMPACTION
 275static int min_extfrag_threshold;
 276static int max_extfrag_threshold = 1000;
 277#endif
 278
 279static struct ctl_table kern_table[] = {
 280        {
 281                .procname       = "sched_child_runs_first",
 282                .data           = &sysctl_sched_child_runs_first,
 283                .maxlen         = sizeof(unsigned int),
 284                .mode           = 0644,
 285                .proc_handler   = proc_dointvec,
 286        },
 287#ifdef CONFIG_SCHED_DEBUG
 288        {
 289                .procname       = "sched_min_granularity_ns",
 290                .data           = &sysctl_sched_min_granularity,
 291                .maxlen         = sizeof(unsigned int),
 292                .mode           = 0644,
 293                .proc_handler   = sched_proc_update_handler,
 294                .extra1         = &min_sched_granularity_ns,
 295                .extra2         = &max_sched_granularity_ns,
 296        },
 297        {
 298                .procname       = "sched_latency_ns",
 299                .data           = &sysctl_sched_latency,
 300                .maxlen         = sizeof(unsigned int),
 301                .mode           = 0644,
 302                .proc_handler   = sched_proc_update_handler,
 303                .extra1         = &min_sched_granularity_ns,
 304                .extra2         = &max_sched_granularity_ns,
 305        },
 306        {
 307                .procname       = "sched_wakeup_granularity_ns",
 308                .data           = &sysctl_sched_wakeup_granularity,
 309                .maxlen         = sizeof(unsigned int),
 310                .mode           = 0644,
 311                .proc_handler   = sched_proc_update_handler,
 312                .extra1         = &min_wakeup_granularity_ns,
 313                .extra2         = &max_wakeup_granularity_ns,
 314        },
 315#ifdef CONFIG_SMP
 316        {
 317                .procname       = "sched_tunable_scaling",
 318                .data           = &sysctl_sched_tunable_scaling,
 319                .maxlen         = sizeof(enum sched_tunable_scaling),
 320                .mode           = 0644,
 321                .proc_handler   = sched_proc_update_handler,
 322                .extra1         = &min_sched_tunable_scaling,
 323                .extra2         = &max_sched_tunable_scaling,
 324        },
 325        {
 326                .procname       = "sched_migration_cost_ns",
 327                .data           = &sysctl_sched_migration_cost,
 328                .maxlen         = sizeof(unsigned int),
 329                .mode           = 0644,
 330                .proc_handler   = proc_dointvec,
 331        },
 332        {
 333                .procname       = "sched_nr_migrate",
 334                .data           = &sysctl_sched_nr_migrate,
 335                .maxlen         = sizeof(unsigned int),
 336                .mode           = 0644,
 337                .proc_handler   = proc_dointvec,
 338        },
 339        {
 340                .procname       = "sched_time_avg_ms",
 341                .data           = &sysctl_sched_time_avg,
 342                .maxlen         = sizeof(unsigned int),
 343                .mode           = 0644,
 344                .proc_handler   = proc_dointvec,
 345        },
 346        {
 347                .procname       = "sched_shares_window_ns",
 348                .data           = &sysctl_sched_shares_window,
 349                .maxlen         = sizeof(unsigned int),
 350                .mode           = 0644,
 351                .proc_handler   = proc_dointvec,
 352        },
 353        {
 354                .procname       = "timer_migration",
 355                .data           = &sysctl_timer_migration,
 356                .maxlen         = sizeof(unsigned int),
 357                .mode           = 0644,
 358                .proc_handler   = proc_dointvec_minmax,
 359                .extra1         = &zero,
 360                .extra2         = &one,
 361        },
 362#endif /* CONFIG_SMP */
 363#ifdef CONFIG_NUMA_BALANCING
 364        {
 365                .procname       = "numa_balancing_scan_delay_ms",
 366                .data           = &sysctl_numa_balancing_scan_delay,
 367                .maxlen         = sizeof(unsigned int),
 368                .mode           = 0644,
 369                .proc_handler   = proc_dointvec,
 370        },
 371        {
 372                .procname       = "numa_balancing_scan_period_min_ms",
 373                .data           = &sysctl_numa_balancing_scan_period_min,
 374                .maxlen         = sizeof(unsigned int),
 375                .mode           = 0644,
 376                .proc_handler   = proc_dointvec,
 377        },
 378        {
 379                .procname       = "numa_balancing_scan_period_max_ms",
 380                .data           = &sysctl_numa_balancing_scan_period_max,
 381                .maxlen         = sizeof(unsigned int),
 382                .mode           = 0644,
 383                .proc_handler   = proc_dointvec,
 384        },
 385        {
 386                .procname       = "numa_balancing_scan_size_mb",
 387                .data           = &sysctl_numa_balancing_scan_size,
 388                .maxlen         = sizeof(unsigned int),
 389                .mode           = 0644,
 390                .proc_handler   = proc_dointvec,
 391        },
 392        {
 393                .procname       = "numa_balancing",
 394                .data           = NULL, /* filled in by handler */
 395                .maxlen         = sizeof(unsigned int),
 396                .mode           = 0644,
 397                .proc_handler   = sysctl_numa_balancing,
 398                .extra1         = &zero,
 399                .extra2         = &one,
 400        },
 401#endif /* CONFIG_NUMA_BALANCING */
 402#endif /* CONFIG_SCHED_DEBUG */
 403        {
 404                .procname       = "sched_rt_period_us",
 405                .data           = &sysctl_sched_rt_period,
 406                .maxlen         = sizeof(unsigned int),
 407                .mode           = 0644,
 408                .proc_handler   = sched_rt_handler,
 409        },
 410        {
 411                .procname       = "sched_rt_runtime_us",
 412                .data           = &sysctl_sched_rt_runtime,
 413                .maxlen         = sizeof(int),
 414                .mode           = 0644,
 415                .proc_handler   = sched_rt_handler,
 416        },
 417        {
 418                .procname       = "sched_rr_timeslice_ms",
 419                .data           = &sched_rr_timeslice,
 420                .maxlen         = sizeof(int),
 421                .mode           = 0644,
 422                .proc_handler   = sched_rr_handler,
 423        },
 424#ifdef CONFIG_SCHED_AUTOGROUP
 425        {
 426                .procname       = "sched_autogroup_enabled",
 427                .data           = &sysctl_sched_autogroup_enabled,
 428                .maxlen         = sizeof(unsigned int),
 429                .mode           = 0644,
 430                .proc_handler   = proc_dointvec_minmax,
 431                .extra1         = &zero,
 432                .extra2         = &one,
 433        },
 434#endif
 435#ifdef CONFIG_CFS_BANDWIDTH
 436        {
 437                .procname       = "sched_cfs_bandwidth_slice_us",
 438                .data           = &sysctl_sched_cfs_bandwidth_slice,
 439                .maxlen         = sizeof(unsigned int),
 440                .mode           = 0644,
 441                .proc_handler   = proc_dointvec_minmax,
 442                .extra1         = &one,
 443        },
 444#endif
 445#ifdef CONFIG_PROVE_LOCKING
 446        {
 447                .procname       = "prove_locking",
 448                .data           = &prove_locking,
 449                .maxlen         = sizeof(int),
 450                .mode           = 0644,
 451                .proc_handler   = proc_dointvec,
 452        },
 453#endif
 454#ifdef CONFIG_LOCK_STAT
 455        {
 456                .procname       = "lock_stat",
 457                .data           = &lock_stat,
 458                .maxlen         = sizeof(int),
 459                .mode           = 0644,
 460                .proc_handler   = proc_dointvec,
 461        },
 462#endif
 463        {
 464                .procname       = "panic",
 465                .data           = &panic_timeout,
 466                .maxlen         = sizeof(int),
 467                .mode           = 0644,
 468                .proc_handler   = proc_dointvec,
 469        },
 470#ifdef CONFIG_COREDUMP
 471        {
 472                .procname       = "core_uses_pid",
 473                .data           = &core_uses_pid,
 474                .maxlen         = sizeof(int),
 475                .mode           = 0644,
 476                .proc_handler   = proc_dointvec,
 477        },
 478        {
 479                .procname       = "core_pattern",
 480                .data           = core_pattern,
 481                .maxlen         = CORENAME_MAX_SIZE,
 482                .mode           = 0644,
 483                .proc_handler   = proc_dostring_coredump,
 484        },
 485        {
 486                .procname       = "core_pipe_limit",
 487                .data           = &core_pipe_limit,
 488                .maxlen         = sizeof(unsigned int),
 489                .mode           = 0644,
 490                .proc_handler   = proc_dointvec,
 491        },
 492#endif
 493#ifdef CONFIG_PROC_SYSCTL
 494        {
 495                .procname       = "tainted",
 496                .maxlen         = sizeof(long),
 497                .mode           = 0644,
 498                .proc_handler   = proc_taint,
 499        },
 500        {
 501                .procname       = "sysctl_writes_strict",
 502                .data           = &sysctl_writes_strict,
 503                .maxlen         = sizeof(int),
 504                .mode           = 0644,
 505                .proc_handler   = proc_dointvec_minmax,
 506                .extra1         = &neg_one,
 507                .extra2         = &one,
 508        },
 509#endif
 510#ifdef CONFIG_LATENCYTOP
 511        {
 512                .procname       = "latencytop",
 513                .data           = &latencytop_enabled,
 514                .maxlen         = sizeof(int),
 515                .mode           = 0644,
 516                .proc_handler   = proc_dointvec,
 517        },
 518#endif
 519#ifdef CONFIG_BLK_DEV_INITRD
 520        {
 521                .procname       = "real-root-dev",
 522                .data           = &real_root_dev,
 523                .maxlen         = sizeof(int),
 524                .mode           = 0644,
 525                .proc_handler   = proc_dointvec,
 526        },
 527#endif
 528        {
 529                .procname       = "print-fatal-signals",
 530                .data           = &print_fatal_signals,
 531                .maxlen         = sizeof(int),
 532                .mode           = 0644,
 533                .proc_handler   = proc_dointvec,
 534        },
 535#ifdef CONFIG_SPARC
 536        {
 537                .procname       = "reboot-cmd",
 538                .data           = reboot_command,
 539                .maxlen         = 256,
 540                .mode           = 0644,
 541                .proc_handler   = proc_dostring,
 542        },
 543        {
 544                .procname       = "stop-a",
 545                .data           = &stop_a_enabled,
 546                .maxlen         = sizeof (int),
 547                .mode           = 0644,
 548                .proc_handler   = proc_dointvec,
 549        },
 550        {
 551                .procname       = "scons-poweroff",
 552                .data           = &scons_pwroff,
 553                .maxlen         = sizeof (int),
 554                .mode           = 0644,
 555                .proc_handler   = proc_dointvec,
 556        },
 557#endif
 558#ifdef CONFIG_SPARC64
 559        {
 560                .procname       = "tsb-ratio",
 561                .data           = &sysctl_tsb_ratio,
 562                .maxlen         = sizeof (int),
 563                .mode           = 0644,
 564                .proc_handler   = proc_dointvec,
 565        },
 566#endif
 567#ifdef __hppa__
 568        {
 569                .procname       = "soft-power",
 570                .data           = &pwrsw_enabled,
 571                .maxlen         = sizeof (int),
 572                .mode           = 0644,
 573                .proc_handler   = proc_dointvec,
 574        },
 575#endif
 576#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 577        {
 578                .procname       = "unaligned-trap",
 579                .data           = &unaligned_enabled,
 580                .maxlen         = sizeof (int),
 581                .mode           = 0644,
 582                .proc_handler   = proc_dointvec,
 583        },
 584#endif
 585        {
 586                .procname       = "ctrl-alt-del",
 587                .data           = &C_A_D,
 588                .maxlen         = sizeof(int),
 589                .mode           = 0644,
 590                .proc_handler   = proc_dointvec,
 591        },
 592#ifdef CONFIG_FUNCTION_TRACER
 593        {
 594                .procname       = "ftrace_enabled",
 595                .data           = &ftrace_enabled,
 596                .maxlen         = sizeof(int),
 597                .mode           = 0644,
 598                .proc_handler   = ftrace_enable_sysctl,
 599        },
 600#endif
 601#ifdef CONFIG_STACK_TRACER
 602        {
 603                .procname       = "stack_tracer_enabled",
 604                .data           = &stack_tracer_enabled,
 605                .maxlen         = sizeof(int),
 606                .mode           = 0644,
 607                .proc_handler   = stack_trace_sysctl,
 608        },
 609#endif
 610#ifdef CONFIG_TRACING
 611        {
 612                .procname       = "ftrace_dump_on_oops",
 613                .data           = &ftrace_dump_on_oops,
 614                .maxlen         = sizeof(int),
 615                .mode           = 0644,
 616                .proc_handler   = proc_dointvec,
 617        },
 618        {
 619                .procname       = "traceoff_on_warning",
 620                .data           = &__disable_trace_on_warning,
 621                .maxlen         = sizeof(__disable_trace_on_warning),
 622                .mode           = 0644,
 623                .proc_handler   = proc_dointvec,
 624        },
 625#endif
 626#ifdef CONFIG_KEXEC
 627        {
 628                .procname       = "kexec_load_disabled",
 629                .data           = &kexec_load_disabled,
 630                .maxlen         = sizeof(int),
 631                .mode           = 0644,
 632                /* only handle a transition from default "0" to "1" */
 633                .proc_handler   = proc_dointvec_minmax,
 634                .extra1         = &one,
 635                .extra2         = &one,
 636        },
 637#endif
 638#ifdef CONFIG_MODULES
 639        {
 640                .procname       = "modprobe",
 641                .data           = &modprobe_path,
 642                .maxlen         = KMOD_PATH_LEN,
 643                .mode           = 0644,
 644                .proc_handler   = proc_dostring,
 645        },
 646        {
 647                .procname       = "modules_disabled",
 648                .data           = &modules_disabled,
 649                .maxlen         = sizeof(int),
 650                .mode           = 0644,
 651                /* only handle a transition from default "0" to "1" */
 652                .proc_handler   = proc_dointvec_minmax,
 653                .extra1         = &one,
 654                .extra2         = &one,
 655        },
 656#endif
 657#ifdef CONFIG_UEVENT_HELPER
 658        {
 659                .procname       = "hotplug",
 660                .data           = &uevent_helper,
 661                .maxlen         = UEVENT_HELPER_PATH_LEN,
 662                .mode           = 0644,
 663                .proc_handler   = proc_dostring,
 664        },
 665#endif
 666#ifdef CONFIG_CHR_DEV_SG
 667        {
 668                .procname       = "sg-big-buff",
 669                .data           = &sg_big_buff,
 670                .maxlen         = sizeof (int),
 671                .mode           = 0444,
 672                .proc_handler   = proc_dointvec,
 673        },
 674#endif
 675#ifdef CONFIG_BSD_PROCESS_ACCT
 676        {
 677                .procname       = "acct",
 678                .data           = &acct_parm,
 679                .maxlen         = 3*sizeof(int),
 680                .mode           = 0644,
 681                .proc_handler   = proc_dointvec,
 682        },
 683#endif
 684#ifdef CONFIG_MAGIC_SYSRQ
 685        {
 686                .procname       = "sysrq",
 687                .data           = &__sysrq_enabled,
 688                .maxlen         = sizeof (int),
 689                .mode           = 0644,
 690                .proc_handler   = sysrq_sysctl_handler,
 691        },
 692#endif
 693#ifdef CONFIG_PROC_SYSCTL
 694        {
 695                .procname       = "cad_pid",
 696                .data           = NULL,
 697                .maxlen         = sizeof (int),
 698                .mode           = 0600,
 699                .proc_handler   = proc_do_cad_pid,
 700        },
 701#endif
 702        {
 703                .procname       = "threads-max",
 704                .data           = &max_threads,
 705                .maxlen         = sizeof(int),
 706                .mode           = 0644,
 707                .proc_handler   = proc_dointvec,
 708        },
 709        {
 710                .procname       = "random",
 711                .mode           = 0555,
 712                .child          = random_table,
 713        },
 714        {
 715                .procname       = "usermodehelper",
 716                .mode           = 0555,
 717                .child          = usermodehelper_table,
 718        },
 719        {
 720                .procname       = "overflowuid",
 721                .data           = &overflowuid,
 722                .maxlen         = sizeof(int),
 723                .mode           = 0644,
 724                .proc_handler   = proc_dointvec_minmax,
 725                .extra1         = &minolduid,
 726                .extra2         = &maxolduid,
 727        },
 728        {
 729                .procname       = "overflowgid",
 730                .data           = &overflowgid,
 731                .maxlen         = sizeof(int),
 732                .mode           = 0644,
 733                .proc_handler   = proc_dointvec_minmax,
 734                .extra1         = &minolduid,
 735                .extra2         = &maxolduid,
 736        },
 737#ifdef CONFIG_S390
 738#ifdef CONFIG_MATHEMU
 739        {
 740                .procname       = "ieee_emulation_warnings",
 741                .data           = &sysctl_ieee_emulation_warnings,
 742                .maxlen         = sizeof(int),
 743                .mode           = 0644,
 744                .proc_handler   = proc_dointvec,
 745        },
 746#endif
 747        {
 748                .procname       = "userprocess_debug",
 749                .data           = &show_unhandled_signals,
 750                .maxlen         = sizeof(int),
 751                .mode           = 0644,
 752                .proc_handler   = proc_dointvec,
 753        },
 754#endif
 755        {
 756                .procname       = "pid_max",
 757                .data           = &pid_max,
 758                .maxlen         = sizeof (int),
 759                .mode           = 0644,
 760                .proc_handler   = proc_dointvec_minmax,
 761                .extra1         = &pid_max_min,
 762                .extra2         = &pid_max_max,
 763        },
 764        {
 765                .procname       = "panic_on_oops",
 766                .data           = &panic_on_oops,
 767                .maxlen         = sizeof(int),
 768                .mode           = 0644,
 769                .proc_handler   = proc_dointvec,
 770        },
 771#if defined CONFIG_PRINTK
 772        {
 773                .procname       = "printk",
 774                .data           = &console_loglevel,
 775                .maxlen         = 4*sizeof(int),
 776                .mode           = 0644,
 777                .proc_handler   = proc_dointvec,
 778        },
 779        {
 780                .procname       = "printk_ratelimit",
 781                .data           = &printk_ratelimit_state.interval,
 782                .maxlen         = sizeof(int),
 783                .mode           = 0644,
 784                .proc_handler   = proc_dointvec_jiffies,
 785        },
 786        {
 787                .procname       = "printk_ratelimit_burst",
 788                .data           = &printk_ratelimit_state.burst,
 789                .maxlen         = sizeof(int),
 790                .mode           = 0644,
 791                .proc_handler   = proc_dointvec,
 792        },
 793        {
 794                .procname       = "printk_delay",
 795                .data           = &printk_delay_msec,
 796                .maxlen         = sizeof(int),
 797                .mode           = 0644,
 798                .proc_handler   = proc_dointvec_minmax,
 799                .extra1         = &zero,
 800                .extra2         = &ten_thousand,
 801        },
 802        {
 803                .procname       = "dmesg_restrict",
 804                .data           = &dmesg_restrict,
 805                .maxlen         = sizeof(int),
 806                .mode           = 0644,
 807                .proc_handler   = proc_dointvec_minmax_sysadmin,
 808                .extra1         = &zero,
 809                .extra2         = &one,
 810        },
 811        {
 812                .procname       = "kptr_restrict",
 813                .data           = &kptr_restrict,
 814                .maxlen         = sizeof(int),
 815                .mode           = 0644,
 816                .proc_handler   = proc_dointvec_minmax_sysadmin,
 817                .extra1         = &zero,
 818                .extra2         = &two,
 819        },
 820#endif
 821        {
 822                .procname       = "ngroups_max",
 823                .data           = &ngroups_max,
 824                .maxlen         = sizeof (int),
 825                .mode           = 0444,
 826                .proc_handler   = proc_dointvec,
 827        },
 828        {
 829                .procname       = "cap_last_cap",
 830                .data           = (void *)&cap_last_cap,
 831                .maxlen         = sizeof(int),
 832                .mode           = 0444,
 833                .proc_handler   = proc_dointvec,
 834        },
 835#if defined(CONFIG_LOCKUP_DETECTOR)
 836        {
 837                .procname       = "watchdog",
 838                .data           = &watchdog_user_enabled,
 839                .maxlen         = sizeof (int),
 840                .mode           = 0644,
 841                .proc_handler   = proc_dowatchdog,
 842                .extra1         = &zero,
 843                .extra2         = &one,
 844        },
 845        {
 846                .procname       = "watchdog_thresh",
 847                .data           = &watchdog_thresh,
 848                .maxlen         = sizeof(int),
 849                .mode           = 0644,
 850                .proc_handler   = proc_dowatchdog,
 851                .extra1         = &zero,
 852                .extra2         = &sixty,
 853        },
 854        {
 855                .procname       = "softlockup_panic",
 856                .data           = &softlockup_panic,
 857                .maxlen         = sizeof(int),
 858                .mode           = 0644,
 859                .proc_handler   = proc_dointvec_minmax,
 860                .extra1         = &zero,
 861                .extra2         = &one,
 862        },
 863#ifdef CONFIG_SMP
 864        {
 865                .procname       = "softlockup_all_cpu_backtrace",
 866                .data           = &sysctl_softlockup_all_cpu_backtrace,
 867                .maxlen         = sizeof(int),
 868                .mode           = 0644,
 869                .proc_handler   = proc_dointvec_minmax,
 870                .extra1         = &zero,
 871                .extra2         = &one,
 872        },
 873#endif /* CONFIG_SMP */
 874        {
 875                .procname       = "nmi_watchdog",
 876                .data           = &watchdog_user_enabled,
 877                .maxlen         = sizeof (int),
 878                .mode           = 0644,
 879                .proc_handler   = proc_dowatchdog,
 880                .extra1         = &zero,
 881                .extra2         = &one,
 882        },
 883#endif
 884#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 885        {
 886                .procname       = "unknown_nmi_panic",
 887                .data           = &unknown_nmi_panic,
 888                .maxlen         = sizeof (int),
 889                .mode           = 0644,
 890                .proc_handler   = proc_dointvec,
 891        },
 892#endif
 893#if defined(CONFIG_X86)
 894        {
 895                .procname       = "panic_on_unrecovered_nmi",
 896                .data           = &panic_on_unrecovered_nmi,
 897                .maxlen         = sizeof(int),
 898                .mode           = 0644,
 899                .proc_handler   = proc_dointvec,
 900        },
 901        {
 902                .procname       = "panic_on_io_nmi",
 903                .data           = &panic_on_io_nmi,
 904                .maxlen         = sizeof(int),
 905                .mode           = 0644,
 906                .proc_handler   = proc_dointvec,
 907        },
 908#ifdef CONFIG_DEBUG_STACKOVERFLOW
 909        {
 910                .procname       = "panic_on_stackoverflow",
 911                .data           = &sysctl_panic_on_stackoverflow,
 912                .maxlen         = sizeof(int),
 913                .mode           = 0644,
 914                .proc_handler   = proc_dointvec,
 915        },
 916#endif
 917        {
 918                .procname       = "bootloader_type",
 919                .data           = &bootloader_type,
 920                .maxlen         = sizeof (int),
 921                .mode           = 0444,
 922                .proc_handler   = proc_dointvec,
 923        },
 924        {
 925                .procname       = "bootloader_version",
 926                .data           = &bootloader_version,
 927                .maxlen         = sizeof (int),
 928                .mode           = 0444,
 929                .proc_handler   = proc_dointvec,
 930        },
 931        {
 932                .procname       = "kstack_depth_to_print",
 933                .data           = &kstack_depth_to_print,
 934                .maxlen         = sizeof(int),
 935                .mode           = 0644,
 936                .proc_handler   = proc_dointvec,
 937        },
 938        {
 939                .procname       = "io_delay_type",
 940                .data           = &io_delay_type,
 941                .maxlen         = sizeof(int),
 942                .mode           = 0644,
 943                .proc_handler   = proc_dointvec,
 944        },
 945#endif
 946#if defined(CONFIG_MMU)
 947        {
 948                .procname       = "randomize_va_space",
 949                .data           = &randomize_va_space,
 950                .maxlen         = sizeof(int),
 951                .mode           = 0644,
 952                .proc_handler   = proc_dointvec,
 953        },
 954#endif
 955#if defined(CONFIG_S390) && defined(CONFIG_SMP)
 956        {
 957                .procname       = "spin_retry",
 958                .data           = &spin_retry,
 959                .maxlen         = sizeof (int),
 960                .mode           = 0644,
 961                .proc_handler   = proc_dointvec,
 962        },
 963#endif
 964#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
 965        {
 966                .procname       = "acpi_video_flags",
 967                .data           = &acpi_realmode_flags,
 968                .maxlen         = sizeof (unsigned long),
 969                .mode           = 0644,
 970                .proc_handler   = proc_doulongvec_minmax,
 971        },
 972#endif
 973#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 974        {
 975                .procname       = "ignore-unaligned-usertrap",
 976                .data           = &no_unaligned_warning,
 977                .maxlen         = sizeof (int),
 978                .mode           = 0644,
 979                .proc_handler   = proc_dointvec,
 980        },
 981#endif
 982#ifdef CONFIG_IA64
 983        {
 984                .procname       = "unaligned-dump-stack",
 985                .data           = &unaligned_dump_stack,
 986                .maxlen         = sizeof (int),
 987                .mode           = 0644,
 988                .proc_handler   = proc_dointvec,
 989        },
 990#endif
 991#ifdef CONFIG_DETECT_HUNG_TASK
 992        {
 993                .procname       = "hung_task_panic",
 994                .data           = &sysctl_hung_task_panic,
 995                .maxlen         = sizeof(int),
 996                .mode           = 0644,
 997                .proc_handler   = proc_dointvec_minmax,
 998                .extra1         = &zero,
 999                .extra2         = &one,
1000        },
1001        {
1002                .procname       = "hung_task_check_count",
1003                .data           = &sysctl_hung_task_check_count,
1004                .maxlen         = sizeof(int),
1005                .mode           = 0644,
1006                .proc_handler   = proc_dointvec_minmax,
1007                .extra1         = &zero,
1008        },
1009        {
1010                .procname       = "hung_task_timeout_secs",
1011                .data           = &sysctl_hung_task_timeout_secs,
1012                .maxlen         = sizeof(unsigned long),
1013                .mode           = 0644,
1014                .proc_handler   = proc_dohung_task_timeout_secs,
1015                .extra2         = &hung_task_timeout_max,
1016        },
1017        {
1018                .procname       = "hung_task_warnings",
1019                .data           = &sysctl_hung_task_warnings,
1020                .maxlen         = sizeof(int),
1021                .mode           = 0644,
1022                .proc_handler   = proc_dointvec_minmax,
1023                .extra1         = &neg_one,
1024        },
1025#endif
1026#ifdef CONFIG_COMPAT
1027        {
1028                .procname       = "compat-log",
1029                .data           = &compat_log,
1030                .maxlen         = sizeof (int),
1031                .mode           = 0644,
1032                .proc_handler   = proc_dointvec,
1033        },
1034#endif
1035#ifdef CONFIG_RT_MUTEXES
1036        {
1037                .procname       = "max_lock_depth",
1038                .data           = &max_lock_depth,
1039                .maxlen         = sizeof(int),
1040                .mode           = 0644,
1041                .proc_handler   = proc_dointvec,
1042        },
1043#endif
1044        {
1045                .procname       = "poweroff_cmd",
1046                .data           = &poweroff_cmd,
1047                .maxlen         = POWEROFF_CMD_PATH_LEN,
1048                .mode           = 0644,
1049                .proc_handler   = proc_dostring,
1050        },
1051#ifdef CONFIG_KEYS
1052        {
1053                .procname       = "keys",
1054                .mode           = 0555,
1055                .child          = key_sysctls,
1056        },
1057#endif
1058#ifdef CONFIG_RCU_TORTURE_TEST
1059        {
1060                .procname       = "rcutorture_runnable",
1061                .data           = &rcutorture_runnable,
1062                .maxlen         = sizeof(int),
1063                .mode           = 0644,
1064                .proc_handler   = proc_dointvec,
1065        },
1066#endif
1067#ifdef CONFIG_PERF_EVENTS
1068        /*
1069         * User-space scripts rely on the existence of this file
1070         * as a feature check for perf_events being enabled.
1071         *
1072         * So it's an ABI, do not remove!
1073         */
1074        {
1075                .procname       = "perf_event_paranoid",
1076                .data           = &sysctl_perf_event_paranoid,
1077                .maxlen         = sizeof(sysctl_perf_event_paranoid),
1078                .mode           = 0644,
1079                .proc_handler   = proc_dointvec,
1080        },
1081        {
1082                .procname       = "perf_event_mlock_kb",
1083                .data           = &sysctl_perf_event_mlock,
1084                .maxlen         = sizeof(sysctl_perf_event_mlock),
1085                .mode           = 0644,
1086                .proc_handler   = proc_dointvec,
1087        },
1088        {
1089                .procname       = "perf_event_max_sample_rate",
1090                .data           = &sysctl_perf_event_sample_rate,
1091                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1092                .mode           = 0644,
1093                .proc_handler   = perf_proc_update_handler,
1094                .extra1         = &one,
1095        },
1096        {
1097                .procname       = "perf_cpu_time_max_percent",
1098                .data           = &sysctl_perf_cpu_time_max_percent,
1099                .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1100                .mode           = 0644,
1101                .proc_handler   = perf_cpu_time_max_percent_handler,
1102                .extra1         = &zero,
1103                .extra2         = &one_hundred,
1104        },
1105#endif
1106#ifdef CONFIG_KMEMCHECK
1107        {
1108                .procname       = "kmemcheck",
1109                .data           = &kmemcheck_enabled,
1110                .maxlen         = sizeof(int),
1111                .mode           = 0644,
1112                .proc_handler   = proc_dointvec,
1113        },
1114#endif
1115        { }
1116};
1117
1118static struct ctl_table vm_table[] = {
1119        {
1120                .procname       = "overcommit_memory",
1121                .data           = &sysctl_overcommit_memory,
1122                .maxlen         = sizeof(sysctl_overcommit_memory),
1123                .mode           = 0644,
1124                .proc_handler   = proc_dointvec_minmax,
1125                .extra1         = &zero,
1126                .extra2         = &two,
1127        },
1128        {
1129                .procname       = "panic_on_oom",
1130                .data           = &sysctl_panic_on_oom,
1131                .maxlen         = sizeof(sysctl_panic_on_oom),
1132                .mode           = 0644,
1133                .proc_handler   = proc_dointvec_minmax,
1134                .extra1         = &zero,
1135                .extra2         = &two,
1136        },
1137        {
1138                .procname       = "oom_kill_allocating_task",
1139                .data           = &sysctl_oom_kill_allocating_task,
1140                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1141                .mode           = 0644,
1142                .proc_handler   = proc_dointvec,
1143        },
1144        {
1145                .procname       = "oom_dump_tasks",
1146                .data           = &sysctl_oom_dump_tasks,
1147                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1148                .mode           = 0644,
1149                .proc_handler   = proc_dointvec,
1150        },
1151        {
1152                .procname       = "overcommit_ratio",
1153                .data           = &sysctl_overcommit_ratio,
1154                .maxlen         = sizeof(sysctl_overcommit_ratio),
1155                .mode           = 0644,
1156                .proc_handler   = overcommit_ratio_handler,
1157        },
1158        {
1159                .procname       = "overcommit_kbytes",
1160                .data           = &sysctl_overcommit_kbytes,
1161                .maxlen         = sizeof(sysctl_overcommit_kbytes),
1162                .mode           = 0644,
1163                .proc_handler   = overcommit_kbytes_handler,
1164        },
1165        {
1166                .procname       = "page-cluster", 
1167                .data           = &page_cluster,
1168                .maxlen         = sizeof(int),
1169                .mode           = 0644,
1170                .proc_handler   = proc_dointvec_minmax,
1171                .extra1         = &zero,
1172        },
1173        {
1174                .procname       = "dirty_background_ratio",
1175                .data           = &dirty_background_ratio,
1176                .maxlen         = sizeof(dirty_background_ratio),
1177                .mode           = 0644,
1178                .proc_handler   = dirty_background_ratio_handler,
1179                .extra1         = &zero,
1180                .extra2         = &one_hundred,
1181        },
1182        {
1183                .procname       = "dirty_background_bytes",
1184                .data           = &dirty_background_bytes,
1185                .maxlen         = sizeof(dirty_background_bytes),
1186                .mode           = 0644,
1187                .proc_handler   = dirty_background_bytes_handler,
1188                .extra1         = &one_ul,
1189        },
1190        {
1191                .procname       = "dirty_ratio",
1192                .data           = &vm_dirty_ratio,
1193                .maxlen         = sizeof(vm_dirty_ratio),
1194                .mode           = 0644,
1195                .proc_handler   = dirty_ratio_handler,
1196                .extra1         = &zero,
1197                .extra2         = &one_hundred,
1198        },
1199        {
1200                .procname       = "dirty_bytes",
1201                .data           = &vm_dirty_bytes,
1202                .maxlen         = sizeof(vm_dirty_bytes),
1203                .mode           = 0644,
1204                .proc_handler   = dirty_bytes_handler,
1205                .extra1         = &dirty_bytes_min,
1206        },
1207        {
1208                .procname       = "dirty_writeback_centisecs",
1209                .data           = &dirty_writeback_interval,
1210                .maxlen         = sizeof(dirty_writeback_interval),
1211                .mode           = 0644,
1212                .proc_handler   = dirty_writeback_centisecs_handler,
1213        },
1214        {
1215                .procname       = "dirty_expire_centisecs",
1216                .data           = &dirty_expire_interval,
1217                .maxlen         = sizeof(dirty_expire_interval),
1218                .mode           = 0644,
1219                .proc_handler   = proc_dointvec_minmax,
1220                .extra1         = &zero,
1221        },
1222        {
1223                .procname       = "nr_pdflush_threads",
1224                .mode           = 0444 /* read-only */,
1225                .proc_handler   = pdflush_proc_obsolete,
1226        },
1227        {
1228                .procname       = "swappiness",
1229                .data           = &vm_swappiness,
1230                .maxlen         = sizeof(vm_swappiness),
1231                .mode           = 0644,
1232                .proc_handler   = proc_dointvec_minmax,
1233                .extra1         = &zero,
1234                .extra2         = &one_hundred,
1235        },
1236#ifdef CONFIG_HUGETLB_PAGE
1237        {
1238                .procname       = "nr_hugepages",
1239                .data           = NULL,
1240                .maxlen         = sizeof(unsigned long),
1241                .mode           = 0644,
1242                .proc_handler   = hugetlb_sysctl_handler,
1243                .extra1         = (void *)&hugetlb_zero,
1244                .extra2         = (void *)&hugetlb_infinity,
1245        },
1246#ifdef CONFIG_NUMA
1247        {
1248                .procname       = "nr_hugepages_mempolicy",
1249                .data           = NULL,
1250                .maxlen         = sizeof(unsigned long),
1251                .mode           = 0644,
1252                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1253                .extra1         = (void *)&hugetlb_zero,
1254                .extra2         = (void *)&hugetlb_infinity,
1255        },
1256#endif
1257         {
1258                .procname       = "hugetlb_shm_group",
1259                .data           = &sysctl_hugetlb_shm_group,
1260                .maxlen         = sizeof(gid_t),
1261                .mode           = 0644,
1262                .proc_handler   = proc_dointvec,
1263         },
1264         {
1265                .procname       = "hugepages_treat_as_movable",
1266                .data           = &hugepages_treat_as_movable,
1267                .maxlen         = sizeof(int),
1268                .mode           = 0644,
1269                .proc_handler   = proc_dointvec,
1270        },
1271        {
1272                .procname       = "nr_overcommit_hugepages",
1273                .data           = NULL,
1274                .maxlen         = sizeof(unsigned long),
1275                .mode           = 0644,
1276                .proc_handler   = hugetlb_overcommit_handler,
1277                .extra1         = (void *)&hugetlb_zero,
1278                .extra2         = (void *)&hugetlb_infinity,
1279        },
1280#endif
1281        {
1282                .procname       = "lowmem_reserve_ratio",
1283                .data           = &sysctl_lowmem_reserve_ratio,
1284                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1285                .mode           = 0644,
1286                .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1287        },
1288        {
1289                .procname       = "drop_caches",
1290                .data           = &sysctl_drop_caches,
1291                .maxlen         = sizeof(int),
1292                .mode           = 0644,
1293                .proc_handler   = drop_caches_sysctl_handler,
1294                .extra1         = &one,
1295                .extra2         = &four,
1296        },
1297#ifdef CONFIG_COMPACTION
1298        {
1299                .procname       = "compact_memory",
1300                .data           = &sysctl_compact_memory,
1301                .maxlen         = sizeof(int),
1302                .mode           = 0200,
1303                .proc_handler   = sysctl_compaction_handler,
1304        },
1305        {
1306                .procname       = "extfrag_threshold",
1307                .data           = &sysctl_extfrag_threshold,
1308                .maxlen         = sizeof(int),
1309                .mode           = 0644,
1310                .proc_handler   = sysctl_extfrag_handler,
1311                .extra1         = &min_extfrag_threshold,
1312                .extra2         = &max_extfrag_threshold,
1313        },
1314
1315#endif /* CONFIG_COMPACTION */
1316        {
1317                .procname       = "min_free_kbytes",
1318                .data           = &min_free_kbytes,
1319                .maxlen         = sizeof(min_free_kbytes),
1320                .mode           = 0644,
1321                .proc_handler   = min_free_kbytes_sysctl_handler,
1322                .extra1         = &zero,
1323        },
1324        {
1325                .procname       = "percpu_pagelist_fraction",
1326                .data           = &percpu_pagelist_fraction,
1327                .maxlen         = sizeof(percpu_pagelist_fraction),
1328                .mode           = 0644,
1329                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1330                .extra1         = &zero,
1331        },
1332#ifdef CONFIG_MMU
1333        {
1334                .procname       = "max_map_count",
1335                .data           = &sysctl_max_map_count,
1336                .maxlen         = sizeof(sysctl_max_map_count),
1337                .mode           = 0644,
1338                .proc_handler   = proc_dointvec_minmax,
1339                .extra1         = &zero,
1340        },
1341#else
1342        {
1343                .procname       = "nr_trim_pages",
1344                .data           = &sysctl_nr_trim_pages,
1345                .maxlen         = sizeof(sysctl_nr_trim_pages),
1346                .mode           = 0644,
1347                .proc_handler   = proc_dointvec_minmax,
1348                .extra1         = &zero,
1349        },
1350#endif
1351        {
1352                .procname       = "laptop_mode",
1353                .data           = &laptop_mode,
1354                .maxlen         = sizeof(laptop_mode),
1355                .mode           = 0644,
1356                .proc_handler   = proc_dointvec_jiffies,
1357        },
1358        {
1359                .procname       = "block_dump",
1360                .data           = &block_dump,
1361                .maxlen         = sizeof(block_dump),
1362                .mode           = 0644,
1363                .proc_handler   = proc_dointvec,
1364                .extra1         = &zero,
1365        },
1366        {
1367                .procname       = "vfs_cache_pressure",
1368                .data           = &sysctl_vfs_cache_pressure,
1369                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1370                .mode           = 0644,
1371                .proc_handler   = proc_dointvec,
1372                .extra1         = &zero,
1373        },
1374#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1375        {
1376                .procname       = "legacy_va_layout",
1377                .data           = &sysctl_legacy_va_layout,
1378                .maxlen         = sizeof(sysctl_legacy_va_layout),
1379                .mode           = 0644,
1380                .proc_handler   = proc_dointvec,
1381                .extra1         = &zero,
1382        },
1383#endif
1384#ifdef CONFIG_NUMA
1385        {
1386                .procname       = "zone_reclaim_mode",
1387                .data           = &zone_reclaim_mode,
1388                .maxlen         = sizeof(zone_reclaim_mode),
1389                .mode           = 0644,
1390                .proc_handler   = proc_dointvec,
1391                .extra1         = &zero,
1392        },
1393        {
1394                .procname       = "min_unmapped_ratio",
1395                .data           = &sysctl_min_unmapped_ratio,
1396                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1397                .mode           = 0644,
1398                .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1399                .extra1         = &zero,
1400                .extra2         = &one_hundred,
1401        },
1402        {
1403                .procname       = "min_slab_ratio",
1404                .data           = &sysctl_min_slab_ratio,
1405                .maxlen         = sizeof(sysctl_min_slab_ratio),
1406                .mode           = 0644,
1407                .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1408                .extra1         = &zero,
1409                .extra2         = &one_hundred,
1410        },
1411#endif
1412#ifdef CONFIG_SMP
1413        {
1414                .procname       = "stat_interval",
1415                .data           = &sysctl_stat_interval,
1416                .maxlen         = sizeof(sysctl_stat_interval),
1417                .mode           = 0644,
1418                .proc_handler   = proc_dointvec_jiffies,
1419        },
1420#endif
1421#ifdef CONFIG_MMU
1422        {
1423                .procname       = "mmap_min_addr",
1424                .data           = &dac_mmap_min_addr,
1425                .maxlen         = sizeof(unsigned long),
1426                .mode           = 0644,
1427                .proc_handler   = mmap_min_addr_handler,
1428        },
1429#endif
1430#ifdef CONFIG_NUMA
1431        {
1432                .procname       = "numa_zonelist_order",
1433                .data           = &numa_zonelist_order,
1434                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1435                .mode           = 0644,
1436                .proc_handler   = numa_zonelist_order_handler,
1437        },
1438#endif
1439#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1440   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1441        {
1442                .procname       = "vdso_enabled",
1443#ifdef CONFIG_X86_32
1444                .data           = &vdso32_enabled,
1445                .maxlen         = sizeof(vdso32_enabled),
1446#else
1447                .data           = &vdso_enabled,
1448                .maxlen         = sizeof(vdso_enabled),
1449#endif
1450                .mode           = 0644,
1451                .proc_handler   = proc_dointvec,
1452                .extra1         = &zero,
1453        },
1454#endif
1455#ifdef CONFIG_HIGHMEM
1456        {
1457                .procname       = "highmem_is_dirtyable",
1458                .data           = &vm_highmem_is_dirtyable,
1459                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1460                .mode           = 0644,
1461                .proc_handler   = proc_dointvec_minmax,
1462                .extra1         = &zero,
1463                .extra2         = &one,
1464        },
1465#endif
1466        {
1467                .procname       = "scan_unevictable_pages",
1468                .data           = &scan_unevictable_pages,
1469                .maxlen         = sizeof(scan_unevictable_pages),
1470                .mode           = 0644,
1471                .proc_handler   = scan_unevictable_handler,
1472        },
1473#ifdef CONFIG_MEMORY_FAILURE
1474        {
1475                .procname       = "memory_failure_early_kill",
1476                .data           = &sysctl_memory_failure_early_kill,
1477                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1478                .mode           = 0644,
1479                .proc_handler   = proc_dointvec_minmax,
1480                .extra1         = &zero,
1481                .extra2         = &one,
1482        },
1483        {
1484                .procname       = "memory_failure_recovery",
1485                .data           = &sysctl_memory_failure_recovery,
1486                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1487                .mode           = 0644,
1488                .proc_handler   = proc_dointvec_minmax,
1489                .extra1         = &zero,
1490                .extra2         = &one,
1491        },
1492#endif
1493        {
1494                .procname       = "user_reserve_kbytes",
1495                .data           = &sysctl_user_reserve_kbytes,
1496                .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1497                .mode           = 0644,
1498                .proc_handler   = proc_doulongvec_minmax,
1499        },
1500        {
1501                .procname       = "admin_reserve_kbytes",
1502                .data           = &sysctl_admin_reserve_kbytes,
1503                .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1504                .mode           = 0644,
1505                .proc_handler   = proc_doulongvec_minmax,
1506        },
1507        { }
1508};
1509
1510#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1511static struct ctl_table binfmt_misc_table[] = {
1512        { }
1513};
1514#endif
1515
1516static struct ctl_table fs_table[] = {
1517        {
1518                .procname       = "inode-nr",
1519                .data           = &inodes_stat,
1520                .maxlen         = 2*sizeof(long),
1521                .mode           = 0444,
1522                .proc_handler   = proc_nr_inodes,
1523        },
1524        {
1525                .procname       = "inode-state",
1526                .data           = &inodes_stat,
1527                .maxlen         = 7*sizeof(long),
1528                .mode           = 0444,
1529                .proc_handler   = proc_nr_inodes,
1530        },
1531        {
1532                .procname       = "file-nr",
1533                .data           = &files_stat,
1534                .maxlen         = sizeof(files_stat),
1535                .mode           = 0444,
1536                .proc_handler   = proc_nr_files,
1537        },
1538        {
1539                .procname       = "file-max",
1540                .data           = &files_stat.max_files,
1541                .maxlen         = sizeof(files_stat.max_files),
1542                .mode           = 0644,
1543                .proc_handler   = proc_doulongvec_minmax,
1544        },
1545        {
1546                .procname       = "nr_open",
1547                .data           = &sysctl_nr_open,
1548                .maxlen         = sizeof(int),
1549                .mode           = 0644,
1550                .proc_handler   = proc_dointvec_minmax,
1551                .extra1         = &sysctl_nr_open_min,
1552                .extra2         = &sysctl_nr_open_max,
1553        },
1554        {
1555                .procname       = "dentry-state",
1556                .data           = &dentry_stat,
1557                .maxlen         = 6*sizeof(long),
1558                .mode           = 0444,
1559                .proc_handler   = proc_nr_dentry,
1560        },
1561        {
1562                .procname       = "overflowuid",
1563                .data           = &fs_overflowuid,
1564                .maxlen         = sizeof(int),
1565                .mode           = 0644,
1566                .proc_handler   = proc_dointvec_minmax,
1567                .extra1         = &minolduid,
1568                .extra2         = &maxolduid,
1569        },
1570        {
1571                .procname       = "overflowgid",
1572                .data           = &fs_overflowgid,
1573                .maxlen         = sizeof(int),
1574                .mode           = 0644,
1575                .proc_handler   = proc_dointvec_minmax,
1576                .extra1         = &minolduid,
1577                .extra2         = &maxolduid,
1578        },
1579#ifdef CONFIG_FILE_LOCKING
1580        {
1581                .procname       = "leases-enable",
1582                .data           = &leases_enable,
1583                .maxlen         = sizeof(int),
1584                .mode           = 0644,
1585                .proc_handler   = proc_dointvec,
1586        },
1587#endif
1588#ifdef CONFIG_DNOTIFY
1589        {
1590                .procname       = "dir-notify-enable",
1591                .data           = &dir_notify_enable,
1592                .maxlen         = sizeof(int),
1593                .mode           = 0644,
1594                .proc_handler   = proc_dointvec,
1595        },
1596#endif
1597#ifdef CONFIG_MMU
1598#ifdef CONFIG_FILE_LOCKING
1599        {
1600                .procname       = "lease-break-time",
1601                .data           = &lease_break_time,
1602                .maxlen         = sizeof(int),
1603                .mode           = 0644,
1604                .proc_handler   = proc_dointvec,
1605        },
1606#endif
1607#ifdef CONFIG_AIO
1608        {
1609                .procname       = "aio-nr",
1610                .data           = &aio_nr,
1611                .maxlen         = sizeof(aio_nr),
1612                .mode           = 0444,
1613                .proc_handler   = proc_doulongvec_minmax,
1614        },
1615        {
1616                .procname       = "aio-max-nr",
1617                .data           = &aio_max_nr,
1618                .maxlen         = sizeof(aio_max_nr),
1619                .mode           = 0644,
1620                .proc_handler   = proc_doulongvec_minmax,
1621        },
1622#endif /* CONFIG_AIO */
1623#ifdef CONFIG_INOTIFY_USER
1624        {
1625                .procname       = "inotify",
1626                .mode           = 0555,
1627                .child          = inotify_table,
1628        },
1629#endif  
1630#ifdef CONFIG_EPOLL
1631        {
1632                .procname       = "epoll",
1633                .mode           = 0555,
1634                .child          = epoll_table,
1635        },
1636#endif
1637#endif
1638        {
1639                .procname       = "protected_symlinks",
1640                .data           = &sysctl_protected_symlinks,
1641                .maxlen         = sizeof(int),
1642                .mode           = 0600,
1643                .proc_handler   = proc_dointvec_minmax,
1644                .extra1         = &zero,
1645                .extra2         = &one,
1646        },
1647        {
1648                .procname       = "protected_hardlinks",
1649                .data           = &sysctl_protected_hardlinks,
1650                .maxlen         = sizeof(int),
1651                .mode           = 0600,
1652                .proc_handler   = proc_dointvec_minmax,
1653                .extra1         = &zero,
1654                .extra2         = &one,
1655        },
1656        {
1657                .procname       = "suid_dumpable",
1658                .data           = &suid_dumpable,
1659                .maxlen         = sizeof(int),
1660                .mode           = 0644,
1661                .proc_handler   = proc_dointvec_minmax_coredump,
1662                .extra1         = &zero,
1663                .extra2         = &two,
1664        },
1665#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1666        {
1667                .procname       = "binfmt_misc",
1668                .mode           = 0555,
1669                .child          = binfmt_misc_table,
1670        },
1671#endif
1672        {
1673                .procname       = "pipe-max-size",
1674                .data           = &pipe_max_size,
1675                .maxlen         = sizeof(int),
1676                .mode           = 0644,
1677                .proc_handler   = &pipe_proc_fn,
1678                .extra1         = &pipe_min_size,
1679        },
1680        { }
1681};
1682
1683static struct ctl_table debug_table[] = {
1684#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1685        {
1686                .procname       = "exception-trace",
1687                .data           = &show_unhandled_signals,
1688                .maxlen         = sizeof(int),
1689                .mode           = 0644,
1690                .proc_handler   = proc_dointvec
1691        },
1692#endif
1693#if defined(CONFIG_OPTPROBES)
1694        {
1695                .procname       = "kprobes-optimization",
1696                .data           = &sysctl_kprobes_optimization,
1697                .maxlen         = sizeof(int),
1698                .mode           = 0644,
1699                .proc_handler   = proc_kprobes_optimization_handler,
1700                .extra1         = &zero,
1701                .extra2         = &one,
1702        },
1703#endif
1704        { }
1705};
1706
1707static struct ctl_table dev_table[] = {
1708        { }
1709};
1710
1711int __init sysctl_init(void)
1712{
1713        struct ctl_table_header *hdr;
1714
1715        hdr = register_sysctl_table(sysctl_base_table);
1716        kmemleak_not_leak(hdr);
1717        return 0;
1718}
1719
1720#endif /* CONFIG_SYSCTL */
1721
1722/*
1723 * /proc/sys support
1724 */
1725
1726#ifdef CONFIG_PROC_SYSCTL
1727
1728static int _proc_do_string(char *data, int maxlen, int write,
1729                           char __user *buffer,
1730                           size_t *lenp, loff_t *ppos)
1731{
1732        size_t len;
1733        char __user *p;
1734        char c;
1735
1736        if (!data || !maxlen || !*lenp) {
1737                *lenp = 0;
1738                return 0;
1739        }
1740
1741        if (write) {
1742                if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1743                        /* Only continue writes not past the end of buffer. */
1744                        len = strlen(data);
1745                        if (len > maxlen - 1)
1746                                len = maxlen - 1;
1747
1748                        if (*ppos > len)
1749                                return 0;
1750                        len = *ppos;
1751                } else {
1752                        /* Start writing from beginning of buffer. */
1753                        len = 0;
1754                }
1755
1756                *ppos += *lenp;
1757                p = buffer;
1758                while ((p - buffer) < *lenp && len < maxlen - 1) {
1759                        if (get_user(c, p++))
1760                                return -EFAULT;
1761                        if (c == 0 || c == '\n')
1762                                break;
1763                        data[len++] = c;
1764                }
1765                data[len] = 0;
1766        } else {
1767                len = strlen(data);
1768                if (len > maxlen)
1769                        len = maxlen;
1770
1771                if (*ppos > len) {
1772                        *lenp = 0;
1773                        return 0;
1774                }
1775
1776                data += *ppos;
1777                len  -= *ppos;
1778
1779                if (len > *lenp)
1780                        len = *lenp;
1781                if (len)
1782                        if (copy_to_user(buffer, data, len))
1783                                return -EFAULT;
1784                if (len < *lenp) {
1785                        if (put_user('\n', buffer + len))
1786                                return -EFAULT;
1787                        len++;
1788                }
1789                *lenp = len;
1790                *ppos += len;
1791        }
1792        return 0;
1793}
1794
1795static void warn_sysctl_write(struct ctl_table *table)
1796{
1797        pr_warn_once("%s wrote to %s when file position was not 0!\n"
1798                "This will not be supported in the future. To silence this\n"
1799                "warning, set kernel.sysctl_writes_strict = -1\n",
1800                current->comm, table->procname);
1801}
1802
1803/**
1804 * proc_dostring - read a string sysctl
1805 * @table: the sysctl table
1806 * @write: %TRUE if this is a write to the sysctl file
1807 * @buffer: the user buffer
1808 * @lenp: the size of the user buffer
1809 * @ppos: file position
1810 *
1811 * Reads/writes a string from/to the user buffer. If the kernel
1812 * buffer provided is not large enough to hold the string, the
1813 * string is truncated. The copied string is %NULL-terminated.
1814 * If the string is being read by the user process, it is copied
1815 * and a newline '\n' is added. It is truncated if the buffer is
1816 * not large enough.
1817 *
1818 * Returns 0 on success.
1819 */
1820int proc_dostring(struct ctl_table *table, int write,
1821                  void __user *buffer, size_t *lenp, loff_t *ppos)
1822{
1823        if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
1824                warn_sysctl_write(table);
1825
1826        return _proc_do_string((char *)(table->data), table->maxlen, write,
1827                               (char __user *)buffer, lenp, ppos);
1828}
1829
1830static size_t proc_skip_spaces(char **buf)
1831{
1832        size_t ret;
1833        char *tmp = skip_spaces(*buf);
1834        ret = tmp - *buf;
1835        *buf = tmp;
1836        return ret;
1837}
1838
1839static void proc_skip_char(char **buf, size_t *size, const char v)
1840{
1841        while (*size) {
1842                if (**buf != v)
1843                        break;
1844                (*size)--;
1845                (*buf)++;
1846        }
1847}
1848
1849#define TMPBUFLEN 22
1850/**
1851 * proc_get_long - reads an ASCII formatted integer from a user buffer
1852 *
1853 * @buf: a kernel buffer
1854 * @size: size of the kernel buffer
1855 * @val: this is where the number will be stored
1856 * @neg: set to %TRUE if number is negative
1857 * @perm_tr: a vector which contains the allowed trailers
1858 * @perm_tr_len: size of the perm_tr vector
1859 * @tr: pointer to store the trailer character
1860 *
1861 * In case of success %0 is returned and @buf and @size are updated with
1862 * the amount of bytes read. If @tr is non-NULL and a trailing
1863 * character exists (size is non-zero after returning from this
1864 * function), @tr is updated with the trailing character.
1865 */
1866static int proc_get_long(char **buf, size_t *size,
1867                          unsigned long *val, bool *neg,
1868                          const char *perm_tr, unsigned perm_tr_len, char *tr)
1869{
1870        int len;
1871        char *p, tmp[TMPBUFLEN];
1872
1873        if (!*size)
1874                return -EINVAL;
1875
1876        len = *size;
1877        if (len > TMPBUFLEN - 1)
1878                len = TMPBUFLEN - 1;
1879
1880        memcpy(tmp, *buf, len);
1881
1882        tmp[len] = 0;
1883        p = tmp;
1884        if (*p == '-' && *size > 1) {
1885                *neg = true;
1886                p++;
1887        } else
1888                *neg = false;
1889        if (!isdigit(*p))
1890                return -EINVAL;
1891
1892        *val = simple_strtoul(p, &p, 0);
1893
1894        len = p - tmp;
1895
1896        /* We don't know if the next char is whitespace thus we may accept
1897         * invalid integers (e.g. 1234...a) or two integers instead of one
1898         * (e.g. 123...1). So lets not allow such large numbers. */
1899        if (len == TMPBUFLEN - 1)
1900                return -EINVAL;
1901
1902        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
1903                return -EINVAL;
1904
1905        if (tr && (len < *size))
1906                *tr = *p;
1907
1908        *buf += len;
1909        *size -= len;
1910
1911        return 0;
1912}
1913
1914/**
1915 * proc_put_long - converts an integer to a decimal ASCII formatted string
1916 *
1917 * @buf: the user buffer
1918 * @size: the size of the user buffer
1919 * @val: the integer to be converted
1920 * @neg: sign of the number, %TRUE for negative
1921 *
1922 * In case of success %0 is returned and @buf and @size are updated with
1923 * the amount of bytes written.
1924 */
1925static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
1926                          bool neg)
1927{
1928        int len;
1929        char tmp[TMPBUFLEN], *p = tmp;
1930
1931        sprintf(p, "%s%lu", neg ? "-" : "", val);
1932        len = strlen(tmp);
1933        if (len > *size)
1934                len = *size;
1935        if (copy_to_user(*buf, tmp, len))
1936                return -EFAULT;
1937        *size -= len;
1938        *buf += len;
1939        return 0;
1940}
1941#undef TMPBUFLEN
1942
1943static int proc_put_char(void __user **buf, size_t *size, char c)
1944{
1945        if (*size) {
1946                char __user **buffer = (char __user **)buf;
1947                if (put_user(c, *buffer))
1948                        return -EFAULT;
1949                (*size)--, (*buffer)++;
1950                *buf = *buffer;
1951        }
1952        return 0;
1953}
1954
1955static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1956                                 int *valp,
1957                                 int write, void *data)
1958{
1959        if (write) {
1960                *valp = *negp ? -*lvalp : *lvalp;
1961        } else {
1962                int val = *valp;
1963                if (val < 0) {
1964                        *negp = true;
1965                        *lvalp = (unsigned long)-val;
1966                } else {
1967                        *negp = false;
1968                        *lvalp = (unsigned long)val;
1969                }
1970        }
1971        return 0;
1972}
1973
1974static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
1975
1976static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
1977                  int write, void __user *buffer,
1978                  size_t *lenp, loff_t *ppos,
1979                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1980                              int write, void *data),
1981                  void *data)
1982{
1983        int *i, vleft, first = 1, err = 0;
1984        unsigned long page = 0;
1985        size_t left;
1986        char *kbuf;
1987        
1988        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1989                *lenp = 0;
1990                return 0;
1991        }
1992        
1993        i = (int *) tbl_data;
1994        vleft = table->maxlen / sizeof(*i);
1995        left = *lenp;
1996
1997        if (!conv)
1998                conv = do_proc_dointvec_conv;
1999
2000        if (write) {
2001                if (*ppos) {
2002                        switch (sysctl_writes_strict) {
2003                        case SYSCTL_WRITES_STRICT:
2004                                goto out;
2005                        case SYSCTL_WRITES_WARN:
2006                                warn_sysctl_write(table);
2007                                break;
2008                        default:
2009                                break;
2010                        }
2011                }
2012
2013                if (left > PAGE_SIZE - 1)
2014                        left = PAGE_SIZE - 1;
2015                page = __get_free_page(GFP_TEMPORARY);
2016                kbuf = (char *) page;
2017                if (!kbuf)
2018                        return -ENOMEM;
2019                if (copy_from_user(kbuf, buffer, left)) {
2020                        err = -EFAULT;
2021                        goto free;
2022                }
2023                kbuf[left] = 0;
2024        }
2025
2026        for (; left && vleft--; i++, first=0) {
2027                unsigned long lval;
2028                bool neg;
2029
2030                if (write) {
2031                        left -= proc_skip_spaces(&kbuf);
2032
2033                        if (!left)
2034                                break;
2035                        err = proc_get_long(&kbuf, &left, &lval, &neg,
2036                                             proc_wspace_sep,
2037                                             sizeof(proc_wspace_sep), NULL);
2038                        if (err)
2039                                break;
2040                        if (conv(&neg, &lval, i, 1, data)) {
2041                                err = -EINVAL;
2042                                break;
2043                        }
2044                } else {
2045                        if (conv(&neg, &lval, i, 0, data)) {
2046                                err = -EINVAL;
2047                                break;
2048                        }
2049                        if (!first)
2050                                err = proc_put_char(&buffer, &left, '\t');
2051                        if (err)
2052                                break;
2053                        err = proc_put_long(&buffer, &left, lval, neg);
2054                        if (err)
2055                                break;
2056                }
2057        }
2058
2059        if (!write && !first && left && !err)
2060                err = proc_put_char(&buffer, &left, '\n');
2061        if (write && !err && left)
2062                left -= proc_skip_spaces(&kbuf);
2063free:
2064        if (write) {
2065                free_page(page);
2066                if (first)
2067                        return err ? : -EINVAL;
2068        }
2069        *lenp -= left;
2070out:
2071        *ppos += *lenp;
2072        return err;
2073}
2074
2075static int do_proc_dointvec(struct ctl_table *table, int write,
2076                  void __user *buffer, size_t *lenp, loff_t *ppos,
2077                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2078                              int write, void *data),
2079                  void *data)
2080{
2081        return __do_proc_dointvec(table->data, table, write,
2082                        buffer, lenp, ppos, conv, data);
2083}
2084
2085/**
2086 * proc_dointvec - read a vector of integers
2087 * @table: the sysctl table
2088 * @write: %TRUE if this is a write to the sysctl file
2089 * @buffer: the user buffer
2090 * @lenp: the size of the user buffer
2091 * @ppos: file position
2092 *
2093 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2094 * values from/to the user buffer, treated as an ASCII string. 
2095 *
2096 * Returns 0 on success.
2097 */
2098int proc_dointvec(struct ctl_table *table, int write,
2099                     void __user *buffer, size_t *lenp, loff_t *ppos)
2100{
2101    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2102                            NULL,NULL);
2103}
2104
2105/*
2106 * Taint values can only be increased
2107 * This means we can safely use a temporary.
2108 */
2109static int proc_taint(struct ctl_table *table, int write,
2110                               void __user *buffer, size_t *lenp, loff_t *ppos)
2111{
2112        struct ctl_table t;
2113        unsigned long tmptaint = get_taint();
2114        int err;
2115
2116        if (write && !capable(CAP_SYS_ADMIN))
2117                return -EPERM;
2118
2119        t = *table;
2120        t.data = &tmptaint;
2121        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2122        if (err < 0)
2123                return err;
2124
2125        if (write) {
2126                /*
2127                 * Poor man's atomic or. Not worth adding a primitive
2128                 * to everyone's atomic.h for this
2129                 */
2130                int i;
2131                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2132                        if ((tmptaint >> i) & 1)
2133                                add_taint(i, LOCKDEP_STILL_OK);
2134                }
2135        }
2136
2137        return err;
2138}
2139
2140#ifdef CONFIG_PRINTK
2141static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2142                                void __user *buffer, size_t *lenp, loff_t *ppos)
2143{
2144        if (write && !capable(CAP_SYS_ADMIN))
2145                return -EPERM;
2146
2147        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2148}
2149#endif
2150
2151struct do_proc_dointvec_minmax_conv_param {
2152        int *min;
2153        int *max;
2154};
2155
2156static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2157                                        int *valp,
2158                                        int write, void *data)
2159{
2160        struct do_proc_dointvec_minmax_conv_param *param = data;
2161        if (write) {
2162                int val = *negp ? -*lvalp : *lvalp;
2163                if ((param->min && *param->min > val) ||
2164                    (param->max && *param->max < val))
2165                        return -EINVAL;
2166                *valp = val;
2167        } else {
2168                int val = *valp;
2169                if (val < 0) {
2170                        *negp = true;
2171                        *lvalp = (unsigned long)-val;
2172                } else {
2173                        *negp = false;
2174                        *lvalp = (unsigned long)val;
2175                }
2176        }
2177        return 0;
2178}
2179
2180/**
2181 * proc_dointvec_minmax - read a vector of integers with min/max values
2182 * @table: the sysctl table
2183 * @write: %TRUE if this is a write to the sysctl file
2184 * @buffer: the user buffer
2185 * @lenp: the size of the user buffer
2186 * @ppos: file position
2187 *
2188 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2189 * values from/to the user buffer, treated as an ASCII string.
2190 *
2191 * This routine will ensure the values are within the range specified by
2192 * table->extra1 (min) and table->extra2 (max).
2193 *
2194 * Returns 0 on success.
2195 */
2196int proc_dointvec_minmax(struct ctl_table *table, int write,
2197                  void __user *buffer, size_t *lenp, loff_t *ppos)
2198{
2199        struct do_proc_dointvec_minmax_conv_param param = {
2200                .min = (int *) table->extra1,
2201                .max = (int *) table->extra2,
2202        };
2203        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2204                                do_proc_dointvec_minmax_conv, &param);
2205}
2206
2207static void validate_coredump_safety(void)
2208{
2209#ifdef CONFIG_COREDUMP
2210        if (suid_dumpable == SUID_DUMP_ROOT &&
2211            core_pattern[0] != '/' && core_pattern[0] != '|') {
2212                printk(KERN_WARNING "Unsafe core_pattern used with "\
2213                        "suid_dumpable=2. Pipe handler or fully qualified "\
2214                        "core dump path required.\n");
2215        }
2216#endif
2217}
2218
2219static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2220                void __user *buffer, size_t *lenp, loff_t *ppos)
2221{
2222        int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2223        if (!error)
2224                validate_coredump_safety();
2225        return error;
2226}
2227
2228#ifdef CONFIG_COREDUMP
2229static int proc_dostring_coredump(struct ctl_table *table, int write,
2230                  void __user *buffer, size_t *lenp, loff_t *ppos)
2231{
2232        int error = proc_dostring(table, write, buffer, lenp, ppos);
2233        if (!error)
2234                validate_coredump_safety();
2235        return error;
2236}
2237#endif
2238
2239static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2240                                     void __user *buffer,
2241                                     size_t *lenp, loff_t *ppos,
2242                                     unsigned long convmul,
2243                                     unsigned long convdiv)
2244{
2245        unsigned long *i, *min, *max;
2246        int vleft, first = 1, err = 0;
2247        unsigned long page = 0;
2248        size_t left;
2249        char *kbuf;
2250
2251        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2252                *lenp = 0;
2253                return 0;
2254        }
2255
2256        i = (unsigned long *) data;
2257        min = (unsigned long *) table->extra1;
2258        max = (unsigned long *) table->extra2;
2259        vleft = table->maxlen / sizeof(unsigned long);
2260        left = *lenp;
2261
2262        if (write) {
2263                if (*ppos) {
2264                        switch (sysctl_writes_strict) {
2265                        case SYSCTL_WRITES_STRICT:
2266                                goto out;
2267                        case SYSCTL_WRITES_WARN:
2268                                warn_sysctl_write(table);
2269                                break;
2270                        default:
2271                                break;
2272                        }
2273                }
2274
2275                if (left > PAGE_SIZE - 1)
2276                        left = PAGE_SIZE - 1;
2277                page = __get_free_page(GFP_TEMPORARY);
2278                kbuf = (char *) page;
2279                if (!kbuf)
2280                        return -ENOMEM;
2281                if (copy_from_user(kbuf, buffer, left)) {
2282                        err = -EFAULT;
2283                        goto free;
2284                }
2285                kbuf[left] = 0;
2286        }
2287
2288        for (; left && vleft--; i++, first = 0) {
2289                unsigned long val;
2290
2291                if (write) {
2292                        bool neg;
2293
2294                        left -= proc_skip_spaces(&kbuf);
2295
2296                        err = proc_get_long(&kbuf, &left, &val, &neg,
2297                                             proc_wspace_sep,
2298                                             sizeof(proc_wspace_sep), NULL);
2299                        if (err)
2300                                break;
2301                        if (neg)
2302                                continue;
2303                        if ((min && val < *min) || (max && val > *max))
2304                                continue;
2305                        *i = val;
2306                } else {
2307                        val = convdiv * (*i) / convmul;
2308                        if (!first) {
2309                                err = proc_put_char(&buffer, &left, '\t');
2310                                if (err)
2311                                        break;
2312                        }
2313                        err = proc_put_long(&buffer, &left, val, false);
2314                        if (err)
2315                                break;
2316                }
2317        }
2318
2319        if (!write && !first && left && !err)
2320                err = proc_put_char(&buffer, &left, '\n');
2321        if (write && !err)
2322                left -= proc_skip_spaces(&kbuf);
2323free:
2324        if (write) {
2325                free_page(page);
2326                if (first)
2327                        return err ? : -EINVAL;
2328        }
2329        *lenp -= left;
2330out:
2331        *ppos += *lenp;
2332        return err;
2333}
2334
2335static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2336                                     void __user *buffer,
2337                                     size_t *lenp, loff_t *ppos,
2338                                     unsigned long convmul,
2339                                     unsigned long convdiv)
2340{
2341        return __do_proc_doulongvec_minmax(table->data, table, write,
2342                        buffer, lenp, ppos, convmul, convdiv);
2343}
2344
2345/**
2346 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2347 * @table: the sysctl table
2348 * @write: %TRUE if this is a write to the sysctl file
2349 * @buffer: the user buffer
2350 * @lenp: the size of the user buffer
2351 * @ppos: file position
2352 *
2353 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2354 * values from/to the user buffer, treated as an ASCII string.
2355 *
2356 * This routine will ensure the values are within the range specified by
2357 * table->extra1 (min) and table->extra2 (max).
2358 *
2359 * Returns 0 on success.
2360 */
2361int proc_doulongvec_minmax(struct ctl_table *table, int write,
2362                           void __user *buffer, size_t *lenp, loff_t *ppos)
2363{
2364    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2365}
2366
2367/**
2368 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2369 * @table: the sysctl table
2370 * @write: %TRUE if this is a write to the sysctl file
2371 * @buffer: the user buffer
2372 * @lenp: the size of the user buffer
2373 * @ppos: file position
2374 *
2375 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2376 * values from/to the user buffer, treated as an ASCII string. The values
2377 * are treated as milliseconds, and converted to jiffies when they are stored.
2378 *
2379 * This routine will ensure the values are within the range specified by
2380 * table->extra1 (min) and table->extra2 (max).
2381 *
2382 * Returns 0 on success.
2383 */
2384int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2385                                      void __user *buffer,
2386                                      size_t *lenp, loff_t *ppos)
2387{
2388    return do_proc_doulongvec_minmax(table, write, buffer,
2389                                     lenp, ppos, HZ, 1000l);
2390}
2391
2392
2393static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2394                                         int *valp,
2395                                         int write, void *data)
2396{
2397        if (write) {
2398                if (*lvalp > LONG_MAX / HZ)
2399                        return 1;
2400                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2401        } else {
2402                int val = *valp;
2403                unsigned long lval;
2404                if (val < 0) {
2405                        *negp = true;
2406                        lval = (unsigned long)-val;
2407                } else {
2408                        *negp = false;
2409                        lval = (unsigned long)val;
2410                }
2411                *lvalp = lval / HZ;
2412        }
2413        return 0;
2414}
2415
2416static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2417                                                int *valp,
2418                                                int write, void *data)
2419{
2420        if (write) {
2421                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2422                        return 1;
2423                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2424        } else {
2425                int val = *valp;
2426                unsigned long lval;
2427                if (val < 0) {
2428                        *negp = true;
2429                        lval = (unsigned long)-val;
2430                } else {
2431                        *negp = false;
2432                        lval = (unsigned long)val;
2433                }
2434                *lvalp = jiffies_to_clock_t(lval);
2435        }
2436        return 0;
2437}
2438
2439static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2440                                            int *valp,
2441                                            int write, void *data)
2442{
2443        if (write) {
2444                unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2445
2446                if (jif > INT_MAX)
2447                        return 1;
2448                *valp = (int)jif;
2449        } else {
2450                int val = *valp;
2451                unsigned long lval;
2452                if (val < 0) {
2453                        *negp = true;
2454                        lval = (unsigned long)-val;
2455                } else {
2456                        *negp = false;
2457                        lval = (unsigned long)val;
2458                }
2459                *lvalp = jiffies_to_msecs(lval);
2460        }
2461        return 0;
2462}
2463
2464/**
2465 * proc_dointvec_jiffies - read a vector of integers as seconds
2466 * @table: the sysctl table
2467 * @write: %TRUE if this is a write to the sysctl file
2468 * @buffer: the user buffer
2469 * @lenp: the size of the user buffer
2470 * @ppos: file position
2471 *
2472 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2473 * values from/to the user buffer, treated as an ASCII string. 
2474 * The values read are assumed to be in seconds, and are converted into
2475 * jiffies.
2476 *
2477 * Returns 0 on success.
2478 */
2479int proc_dointvec_jiffies(struct ctl_table *table, int write,
2480                          void __user *buffer, size_t *lenp, loff_t *ppos)
2481{
2482    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2483                            do_proc_dointvec_jiffies_conv,NULL);
2484}
2485
2486/**
2487 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2488 * @table: the sysctl table
2489 * @write: %TRUE if this is a write to the sysctl file
2490 * @buffer: the user buffer
2491 * @lenp: the size of the user buffer
2492 * @ppos: pointer to the file position
2493 *
2494 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2495 * values from/to the user buffer, treated as an ASCII string. 
2496 * The values read are assumed to be in 1/USER_HZ seconds, and 
2497 * are converted into jiffies.
2498 *
2499 * Returns 0 on success.
2500 */
2501int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2502                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2503{
2504    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2505                            do_proc_dointvec_userhz_jiffies_conv,NULL);
2506}
2507
2508/**
2509 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2510 * @table: the sysctl table
2511 * @write: %TRUE if this is a write to the sysctl file
2512 * @buffer: the user buffer
2513 * @lenp: the size of the user buffer
2514 * @ppos: file position
2515 * @ppos: the current position in the file
2516 *
2517 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2518 * values from/to the user buffer, treated as an ASCII string. 
2519 * The values read are assumed to be in 1/1000 seconds, and 
2520 * are converted into jiffies.
2521 *
2522 * Returns 0 on success.
2523 */
2524int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2525                             void __user *buffer, size_t *lenp, loff_t *ppos)
2526{
2527        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2528                                do_proc_dointvec_ms_jiffies_conv, NULL);
2529}
2530
2531static int proc_do_cad_pid(struct ctl_table *table, int write,
2532                           void __user *buffer, size_t *lenp, loff_t *ppos)
2533{
2534        struct pid *new_pid;
2535        pid_t tmp;
2536        int r;
2537
2538        tmp = pid_vnr(cad_pid);
2539
2540        r = __do_proc_dointvec(&tmp, table, write, buffer,
2541                               lenp, ppos, NULL, NULL);
2542        if (r || !write)
2543                return r;
2544
2545        new_pid = find_get_pid(tmp);
2546        if (!new_pid)
2547                return -ESRCH;
2548
2549        put_pid(xchg(&cad_pid, new_pid));
2550        return 0;
2551}
2552
2553/**
2554 * proc_do_large_bitmap - read/write from/to a large bitmap
2555 * @table: the sysctl table
2556 * @write: %TRUE if this is a write to the sysctl file
2557 * @buffer: the user buffer
2558 * @lenp: the size of the user buffer
2559 * @ppos: file position
2560 *
2561 * The bitmap is stored at table->data and the bitmap length (in bits)
2562 * in table->maxlen.
2563 *
2564 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2565 * large bitmaps may be represented in a compact manner. Writing into
2566 * the file will clear the bitmap then update it with the given input.
2567 *
2568 * Returns 0 on success.
2569 */
2570int proc_do_large_bitmap(struct ctl_table *table, int write,
2571                         void __user *buffer, size_t *lenp, loff_t *ppos)
2572{
2573        int err = 0;
2574        bool first = 1;
2575        size_t left = *lenp;
2576        unsigned long bitmap_len = table->maxlen;
2577        unsigned long *bitmap = *(unsigned long **) table->data;
2578        unsigned long *tmp_bitmap = NULL;
2579        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2580
2581        if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
2582                *lenp = 0;
2583                return 0;
2584        }
2585
2586        if (write) {
2587                unsigned long page = 0;
2588                char *kbuf;
2589
2590                if (left > PAGE_SIZE - 1)
2591                        left = PAGE_SIZE - 1;
2592
2593                page = __get_free_page(GFP_TEMPORARY);
2594                kbuf = (char *) page;
2595                if (!kbuf)
2596                        return -ENOMEM;
2597                if (copy_from_user(kbuf, buffer, left)) {
2598                        free_page(page);
2599                        return -EFAULT;
2600                }
2601                kbuf[left] = 0;
2602
2603                tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2604                                     GFP_KERNEL);
2605                if (!tmp_bitmap) {
2606                        free_page(page);
2607                        return -ENOMEM;
2608                }
2609                proc_skip_char(&kbuf, &left, '\n');
2610                while (!err && left) {
2611                        unsigned long val_a, val_b;
2612                        bool neg;
2613
2614                        err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2615                                             sizeof(tr_a), &c);
2616                        if (err)
2617                                break;
2618                        if (val_a >= bitmap_len || neg) {
2619                                err = -EINVAL;
2620                                break;
2621                        }
2622
2623                        val_b = val_a;
2624                        if (left) {
2625                                kbuf++;
2626                                left--;
2627                        }
2628
2629                        if (c == '-') {
2630                                err = proc_get_long(&kbuf, &left, &val_b,
2631                                                     &neg, tr_b, sizeof(tr_b),
2632                                                     &c);
2633                                if (err)
2634                                        break;
2635                                if (val_b >= bitmap_len || neg ||
2636                                    val_a > val_b) {
2637                                        err = -EINVAL;
2638                                        break;
2639                                }
2640                                if (left) {
2641                                        kbuf++;
2642                                        left--;
2643                                }
2644                        }
2645
2646                        bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2647                        first = 0;
2648                        proc_skip_char(&kbuf, &left, '\n');
2649                }
2650                free_page(page);
2651        } else {
2652                unsigned long bit_a, bit_b = 0;
2653
2654                while (left) {
2655                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2656                        if (bit_a >= bitmap_len)
2657                                break;
2658                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
2659                                                   bit_a + 1) - 1;
2660
2661                        if (!first) {
2662                                err = proc_put_char(&buffer, &left, ',');
2663                                if (err)
2664                                        break;
2665                        }
2666                        err = proc_put_long(&buffer, &left, bit_a, false);
2667                        if (err)
2668                                break;
2669                        if (bit_a != bit_b) {
2670                                err = proc_put_char(&buffer, &left, '-');
2671                                if (err)
2672                                        break;
2673                                err = proc_put_long(&buffer, &left, bit_b, false);
2674                                if (err)
2675                                        break;
2676                        }
2677
2678                        first = 0; bit_b++;
2679                }
2680                if (!err)
2681                        err = proc_put_char(&buffer, &left, '\n');
2682        }
2683
2684        if (!err) {
2685                if (write) {
2686                        if (*ppos)
2687                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2688                        else
2689                                bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2690                }
2691                kfree(tmp_bitmap);
2692                *lenp -= left;
2693                *ppos += *lenp;
2694                return 0;
2695        } else {
2696                kfree(tmp_bitmap);
2697                return err;
2698        }
2699}
2700
2701#else /* CONFIG_PROC_SYSCTL */
2702
2703int proc_dostring(struct ctl_table *table, int write,
2704                  void __user *buffer, size_t *lenp, loff_t *ppos)
2705{
2706        return -ENOSYS;
2707}
2708
2709int proc_dointvec(struct ctl_table *table, int write,
2710                  void __user *buffer, size_t *lenp, loff_t *ppos)
2711{
2712        return -ENOSYS;
2713}
2714
2715int proc_dointvec_minmax(struct ctl_table *table, int write,
2716                    void __user *buffer, size_t *lenp, loff_t *ppos)
2717{
2718        return -ENOSYS;
2719}
2720
2721int proc_dointvec_jiffies(struct ctl_table *table, int write,
2722                    void __user *buffer, size_t *lenp, loff_t *ppos)
2723{
2724        return -ENOSYS;
2725}
2726
2727int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2728                    void __user *buffer, size_t *lenp, loff_t *ppos)
2729{
2730        return -ENOSYS;
2731}
2732
2733int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2734                             void __user *buffer, size_t *lenp, loff_t *ppos)
2735{
2736        return -ENOSYS;
2737}
2738
2739int proc_doulongvec_minmax(struct ctl_table *table, int write,
2740                    void __user *buffer, size_t *lenp, loff_t *ppos)
2741{
2742        return -ENOSYS;
2743}
2744
2745int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2746                                      void __user *buffer,
2747                                      size_t *lenp, loff_t *ppos)
2748{
2749    return -ENOSYS;
2750}
2751
2752
2753#endif /* CONFIG_PROC_SYSCTL */
2754
2755/*
2756 * No sense putting this after each symbol definition, twice,
2757 * exception granted :-)
2758 */
2759EXPORT_SYMBOL(proc_dointvec);
2760EXPORT_SYMBOL(proc_dointvec_jiffies);
2761EXPORT_SYMBOL(proc_dointvec_minmax);
2762EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2763EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2764EXPORT_SYMBOL(proc_dostring);
2765EXPORT_SYMBOL(proc_doulongvec_minmax);
2766EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2767