linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/aio.h>
  23#include <linux/mm.h>
  24#include <linux/swap.h>
  25#include <linux/slab.h>
  26#include <linux/sysctl.h>
  27#include <linux/bitmap.h>
  28#include <linux/signal.h>
  29#include <linux/printk.h>
  30#include <linux/proc_fs.h>
  31#include <linux/security.h>
  32#include <linux/ctype.h>
  33#include <linux/kmemleak.h>
  34#include <linux/fs.h>
  35#include <linux/init.h>
  36#include <linux/kernel.h>
  37#include <linux/kobject.h>
  38#include <linux/net.h>
  39#include <linux/sysrq.h>
  40#include <linux/highuid.h>
  41#include <linux/writeback.h>
  42#include <linux/ratelimit.h>
  43#include <linux/compaction.h>
  44#include <linux/hugetlb.h>
  45#include <linux/initrd.h>
  46#include <linux/key.h>
  47#include <linux/times.h>
  48#include <linux/limits.h>
  49#include <linux/dcache.h>
  50#include <linux/dnotify.h>
  51#include <linux/syscalls.h>
  52#include <linux/vmstat.h>
  53#include <linux/nfs_fs.h>
  54#include <linux/acpi.h>
  55#include <linux/reboot.h>
  56#include <linux/ftrace.h>
  57#include <linux/perf_event.h>
  58#include <linux/kprobes.h>
  59#include <linux/pipe_fs_i.h>
  60#include <linux/oom.h>
  61#include <linux/kmod.h>
  62#include <linux/capability.h>
  63#include <linux/binfmts.h>
  64#include <linux/sched/sysctl.h>
  65#include <linux/sched/coredump.h>
  66#include <linux/kexec.h>
  67#include <linux/bpf.h>
  68#include <linux/mount.h>
  69#include <linux/pipe_fs_i.h>
  70
  71#include <linux/uaccess.h>
  72#include <asm/processor.h>
  73
  74#ifdef CONFIG_X86
  75#include <asm/nmi.h>
  76#include <asm/stacktrace.h>
  77#include <asm/io.h>
  78#endif
  79#ifdef CONFIG_SPARC
  80#include <asm/setup.h>
  81#endif
  82#ifdef CONFIG_BSD_PROCESS_ACCT
  83#include <linux/acct.h>
  84#endif
  85#ifdef CONFIG_RT_MUTEXES
  86#include <linux/rtmutex.h>
  87#endif
  88#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
  89#include <linux/lockdep.h>
  90#endif
  91#ifdef CONFIG_CHR_DEV_SG
  92#include <scsi/sg.h>
  93#endif
  94
  95#ifdef CONFIG_LOCKUP_DETECTOR
  96#include <linux/nmi.h>
  97#endif
  98
  99#if defined(CONFIG_SYSCTL)
 100
 101/* External variables not in a header file. */
 102extern int suid_dumpable;
 103#ifdef CONFIG_COREDUMP
 104extern int core_uses_pid;
 105extern char core_pattern[];
 106extern unsigned int core_pipe_limit;
 107#endif
 108extern int pid_max;
 109extern int pid_max_min, pid_max_max;
 110extern int percpu_pagelist_fraction;
 111extern int latencytop_enabled;
 112extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 113#ifndef CONFIG_MMU
 114extern int sysctl_nr_trim_pages;
 115#endif
 116
 117/* Constants used for minimum and  maximum */
 118#ifdef CONFIG_LOCKUP_DETECTOR
 119static int sixty = 60;
 120#endif
 121
 122static int __maybe_unused neg_one = -1;
 123
 124static int zero;
 125static int __maybe_unused one = 1;
 126static int __maybe_unused two = 2;
 127static int __maybe_unused four = 4;
 128static unsigned long one_ul = 1;
 129static int one_hundred = 100;
 130static int one_thousand = 1000;
 131#ifdef CONFIG_PRINTK
 132static int ten_thousand = 10000;
 133#endif
 134#ifdef CONFIG_PERF_EVENTS
 135static int six_hundred_forty_kb = 640 * 1024;
 136#endif
 137
 138/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 139static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 140
 141/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 142static int maxolduid = 65535;
 143static int minolduid;
 144
 145static int ngroups_max = NGROUPS_MAX;
 146static const int cap_last_cap = CAP_LAST_CAP;
 147
 148/*
 149 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
 150 * and hung_task_check_interval_secs
 151 */
 152#ifdef CONFIG_DETECT_HUNG_TASK
 153static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 154#endif
 155
 156#ifdef CONFIG_INOTIFY_USER
 157#include <linux/inotify.h>
 158#endif
 159#ifdef CONFIG_SPARC
 160#endif
 161
 162#ifdef __hppa__
 163extern int pwrsw_enabled;
 164#endif
 165
 166#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 167extern int unaligned_enabled;
 168#endif
 169
 170#ifdef CONFIG_IA64
 171extern int unaligned_dump_stack;
 172#endif
 173
 174#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 175extern int no_unaligned_warning;
 176#endif
 177
 178#ifdef CONFIG_PROC_SYSCTL
 179
 180/**
 181 * enum sysctl_writes_mode - supported sysctl write modes
 182 *
 183 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 184 *      to be written, and multiple writes on the same sysctl file descriptor
 185 *      will rewrite the sysctl value, regardless of file position. No warning
 186 *      is issued when the initial position is not 0.
 187 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 188 *      not 0.
 189 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 190 *      file position 0 and the value must be fully contained in the buffer
 191 *      sent to the write syscall. If dealing with strings respect the file
 192 *      position, but restrict this to the max length of the buffer, anything
 193 *      passed the max lenght will be ignored. Multiple writes will append
 194 *      to the buffer.
 195 *
 196 * These write modes control how current file position affects the behavior of
 197 * updating sysctl values through the proc interface on each write.
 198 */
 199enum sysctl_writes_mode {
 200        SYSCTL_WRITES_LEGACY            = -1,
 201        SYSCTL_WRITES_WARN              = 0,
 202        SYSCTL_WRITES_STRICT            = 1,
 203};
 204
 205static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 206
 207static int proc_do_cad_pid(struct ctl_table *table, int write,
 208                  void __user *buffer, size_t *lenp, loff_t *ppos);
 209static int proc_taint(struct ctl_table *table, int write,
 210                               void __user *buffer, size_t *lenp, loff_t *ppos);
 211#endif
 212
 213#ifdef CONFIG_PRINTK
 214static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 215                                void __user *buffer, size_t *lenp, loff_t *ppos);
 216#endif
 217
 218static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 219                void __user *buffer, size_t *lenp, loff_t *ppos);
 220#ifdef CONFIG_COREDUMP
 221static int proc_dostring_coredump(struct ctl_table *table, int write,
 222                void __user *buffer, size_t *lenp, loff_t *ppos);
 223#endif
 224static int proc_dopipe_max_size(struct ctl_table *table, int write,
 225                void __user *buffer, size_t *lenp, loff_t *ppos);
 226
 227#ifdef CONFIG_MAGIC_SYSRQ
 228/* Note: sysrq code uses its own private copy */
 229static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 230
 231static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 232                                void __user *buffer, size_t *lenp,
 233                                loff_t *ppos)
 234{
 235        int error;
 236
 237        error = proc_dointvec(table, write, buffer, lenp, ppos);
 238        if (error)
 239                return error;
 240
 241        if (write)
 242                sysrq_toggle_support(__sysrq_enabled);
 243
 244        return 0;
 245}
 246
 247#endif
 248
 249static struct ctl_table kern_table[];
 250static struct ctl_table vm_table[];
 251static struct ctl_table fs_table[];
 252static struct ctl_table debug_table[];
 253static struct ctl_table dev_table[];
 254extern struct ctl_table random_table[];
 255#ifdef CONFIG_EPOLL
 256extern struct ctl_table epoll_table[];
 257#endif
 258
 259#ifdef CONFIG_FW_LOADER_USER_HELPER
 260extern struct ctl_table firmware_config_table[];
 261#endif
 262
 263#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 264int sysctl_legacy_va_layout;
 265#endif
 266
 267/* The default sysctl tables: */
 268
 269static struct ctl_table sysctl_base_table[] = {
 270        {
 271                .procname       = "kernel",
 272                .mode           = 0555,
 273                .child          = kern_table,
 274        },
 275        {
 276                .procname       = "vm",
 277                .mode           = 0555,
 278                .child          = vm_table,
 279        },
 280        {
 281                .procname       = "fs",
 282                .mode           = 0555,
 283                .child          = fs_table,
 284        },
 285        {
 286                .procname       = "debug",
 287                .mode           = 0555,
 288                .child          = debug_table,
 289        },
 290        {
 291                .procname       = "dev",
 292                .mode           = 0555,
 293                .child          = dev_table,
 294        },
 295        { }
 296};
 297
 298#ifdef CONFIG_SCHED_DEBUG
 299static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 300static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 301static int min_wakeup_granularity_ns;                   /* 0 usecs */
 302static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 303#ifdef CONFIG_SMP
 304static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 305static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 306#endif /* CONFIG_SMP */
 307#endif /* CONFIG_SCHED_DEBUG */
 308
 309#ifdef CONFIG_COMPACTION
 310static int min_extfrag_threshold;
 311static int max_extfrag_threshold = 1000;
 312#endif
 313
 314static struct ctl_table kern_table[] = {
 315        {
 316                .procname       = "sched_child_runs_first",
 317                .data           = &sysctl_sched_child_runs_first,
 318                .maxlen         = sizeof(unsigned int),
 319                .mode           = 0644,
 320                .proc_handler   = proc_dointvec,
 321        },
 322#ifdef CONFIG_SCHED_DEBUG
 323        {
 324                .procname       = "sched_min_granularity_ns",
 325                .data           = &sysctl_sched_min_granularity,
 326                .maxlen         = sizeof(unsigned int),
 327                .mode           = 0644,
 328                .proc_handler   = sched_proc_update_handler,
 329                .extra1         = &min_sched_granularity_ns,
 330                .extra2         = &max_sched_granularity_ns,
 331        },
 332        {
 333                .procname       = "sched_latency_ns",
 334                .data           = &sysctl_sched_latency,
 335                .maxlen         = sizeof(unsigned int),
 336                .mode           = 0644,
 337                .proc_handler   = sched_proc_update_handler,
 338                .extra1         = &min_sched_granularity_ns,
 339                .extra2         = &max_sched_granularity_ns,
 340        },
 341        {
 342                .procname       = "sched_wakeup_granularity_ns",
 343                .data           = &sysctl_sched_wakeup_granularity,
 344                .maxlen         = sizeof(unsigned int),
 345                .mode           = 0644,
 346                .proc_handler   = sched_proc_update_handler,
 347                .extra1         = &min_wakeup_granularity_ns,
 348                .extra2         = &max_wakeup_granularity_ns,
 349        },
 350#ifdef CONFIG_SMP
 351        {
 352                .procname       = "sched_tunable_scaling",
 353                .data           = &sysctl_sched_tunable_scaling,
 354                .maxlen         = sizeof(enum sched_tunable_scaling),
 355                .mode           = 0644,
 356                .proc_handler   = sched_proc_update_handler,
 357                .extra1         = &min_sched_tunable_scaling,
 358                .extra2         = &max_sched_tunable_scaling,
 359        },
 360        {
 361                .procname       = "sched_migration_cost_ns",
 362                .data           = &sysctl_sched_migration_cost,
 363                .maxlen         = sizeof(unsigned int),
 364                .mode           = 0644,
 365                .proc_handler   = proc_dointvec,
 366        },
 367        {
 368                .procname       = "sched_nr_migrate",
 369                .data           = &sysctl_sched_nr_migrate,
 370                .maxlen         = sizeof(unsigned int),
 371                .mode           = 0644,
 372                .proc_handler   = proc_dointvec,
 373        },
 374#ifdef CONFIG_SCHEDSTATS
 375        {
 376                .procname       = "sched_schedstats",
 377                .data           = NULL,
 378                .maxlen         = sizeof(unsigned int),
 379                .mode           = 0644,
 380                .proc_handler   = sysctl_schedstats,
 381                .extra1         = &zero,
 382                .extra2         = &one,
 383        },
 384#endif /* CONFIG_SCHEDSTATS */
 385#endif /* CONFIG_SMP */
 386#ifdef CONFIG_NUMA_BALANCING
 387        {
 388                .procname       = "numa_balancing_scan_delay_ms",
 389                .data           = &sysctl_numa_balancing_scan_delay,
 390                .maxlen         = sizeof(unsigned int),
 391                .mode           = 0644,
 392                .proc_handler   = proc_dointvec,
 393        },
 394        {
 395                .procname       = "numa_balancing_scan_period_min_ms",
 396                .data           = &sysctl_numa_balancing_scan_period_min,
 397                .maxlen         = sizeof(unsigned int),
 398                .mode           = 0644,
 399                .proc_handler   = proc_dointvec,
 400        },
 401        {
 402                .procname       = "numa_balancing_scan_period_max_ms",
 403                .data           = &sysctl_numa_balancing_scan_period_max,
 404                .maxlen         = sizeof(unsigned int),
 405                .mode           = 0644,
 406                .proc_handler   = proc_dointvec,
 407        },
 408        {
 409                .procname       = "numa_balancing_scan_size_mb",
 410                .data           = &sysctl_numa_balancing_scan_size,
 411                .maxlen         = sizeof(unsigned int),
 412                .mode           = 0644,
 413                .proc_handler   = proc_dointvec_minmax,
 414                .extra1         = &one,
 415        },
 416        {
 417                .procname       = "numa_balancing",
 418                .data           = NULL, /* filled in by handler */
 419                .maxlen         = sizeof(unsigned int),
 420                .mode           = 0644,
 421                .proc_handler   = sysctl_numa_balancing,
 422                .extra1         = &zero,
 423                .extra2         = &one,
 424        },
 425#endif /* CONFIG_NUMA_BALANCING */
 426#endif /* CONFIG_SCHED_DEBUG */
 427        {
 428                .procname       = "sched_rt_period_us",
 429                .data           = &sysctl_sched_rt_period,
 430                .maxlen         = sizeof(unsigned int),
 431                .mode           = 0644,
 432                .proc_handler   = sched_rt_handler,
 433        },
 434        {
 435                .procname       = "sched_rt_runtime_us",
 436                .data           = &sysctl_sched_rt_runtime,
 437                .maxlen         = sizeof(int),
 438                .mode           = 0644,
 439                .proc_handler   = sched_rt_handler,
 440        },
 441        {
 442                .procname       = "sched_rr_timeslice_ms",
 443                .data           = &sysctl_sched_rr_timeslice,
 444                .maxlen         = sizeof(int),
 445                .mode           = 0644,
 446                .proc_handler   = sched_rr_handler,
 447        },
 448#ifdef CONFIG_SCHED_AUTOGROUP
 449        {
 450                .procname       = "sched_autogroup_enabled",
 451                .data           = &sysctl_sched_autogroup_enabled,
 452                .maxlen         = sizeof(unsigned int),
 453                .mode           = 0644,
 454                .proc_handler   = proc_dointvec_minmax,
 455                .extra1         = &zero,
 456                .extra2         = &one,
 457        },
 458#endif
 459#ifdef CONFIG_CFS_BANDWIDTH
 460        {
 461                .procname       = "sched_cfs_bandwidth_slice_us",
 462                .data           = &sysctl_sched_cfs_bandwidth_slice,
 463                .maxlen         = sizeof(unsigned int),
 464                .mode           = 0644,
 465                .proc_handler   = proc_dointvec_minmax,
 466                .extra1         = &one,
 467        },
 468#endif
 469#ifdef CONFIG_PROVE_LOCKING
 470        {
 471                .procname       = "prove_locking",
 472                .data           = &prove_locking,
 473                .maxlen         = sizeof(int),
 474                .mode           = 0644,
 475                .proc_handler   = proc_dointvec,
 476        },
 477#endif
 478#ifdef CONFIG_LOCK_STAT
 479        {
 480                .procname       = "lock_stat",
 481                .data           = &lock_stat,
 482                .maxlen         = sizeof(int),
 483                .mode           = 0644,
 484                .proc_handler   = proc_dointvec,
 485        },
 486#endif
 487        {
 488                .procname       = "panic",
 489                .data           = &panic_timeout,
 490                .maxlen         = sizeof(int),
 491                .mode           = 0644,
 492                .proc_handler   = proc_dointvec,
 493        },
 494#ifdef CONFIG_COREDUMP
 495        {
 496                .procname       = "core_uses_pid",
 497                .data           = &core_uses_pid,
 498                .maxlen         = sizeof(int),
 499                .mode           = 0644,
 500                .proc_handler   = proc_dointvec,
 501        },
 502        {
 503                .procname       = "core_pattern",
 504                .data           = core_pattern,
 505                .maxlen         = CORENAME_MAX_SIZE,
 506                .mode           = 0644,
 507                .proc_handler   = proc_dostring_coredump,
 508        },
 509        {
 510                .procname       = "core_pipe_limit",
 511                .data           = &core_pipe_limit,
 512                .maxlen         = sizeof(unsigned int),
 513                .mode           = 0644,
 514                .proc_handler   = proc_dointvec,
 515        },
 516#endif
 517#ifdef CONFIG_PROC_SYSCTL
 518        {
 519                .procname       = "tainted",
 520                .maxlen         = sizeof(long),
 521                .mode           = 0644,
 522                .proc_handler   = proc_taint,
 523        },
 524        {
 525                .procname       = "sysctl_writes_strict",
 526                .data           = &sysctl_writes_strict,
 527                .maxlen         = sizeof(int),
 528                .mode           = 0644,
 529                .proc_handler   = proc_dointvec_minmax,
 530                .extra1         = &neg_one,
 531                .extra2         = &one,
 532        },
 533#endif
 534#ifdef CONFIG_LATENCYTOP
 535        {
 536                .procname       = "latencytop",
 537                .data           = &latencytop_enabled,
 538                .maxlen         = sizeof(int),
 539                .mode           = 0644,
 540                .proc_handler   = sysctl_latencytop,
 541        },
 542#endif
 543#ifdef CONFIG_BLK_DEV_INITRD
 544        {
 545                .procname       = "real-root-dev",
 546                .data           = &real_root_dev,
 547                .maxlen         = sizeof(int),
 548                .mode           = 0644,
 549                .proc_handler   = proc_dointvec,
 550        },
 551#endif
 552        {
 553                .procname       = "print-fatal-signals",
 554                .data           = &print_fatal_signals,
 555                .maxlen         = sizeof(int),
 556                .mode           = 0644,
 557                .proc_handler   = proc_dointvec,
 558        },
 559#ifdef CONFIG_SPARC
 560        {
 561                .procname       = "reboot-cmd",
 562                .data           = reboot_command,
 563                .maxlen         = 256,
 564                .mode           = 0644,
 565                .proc_handler   = proc_dostring,
 566        },
 567        {
 568                .procname       = "stop-a",
 569                .data           = &stop_a_enabled,
 570                .maxlen         = sizeof (int),
 571                .mode           = 0644,
 572                .proc_handler   = proc_dointvec,
 573        },
 574        {
 575                .procname       = "scons-poweroff",
 576                .data           = &scons_pwroff,
 577                .maxlen         = sizeof (int),
 578                .mode           = 0644,
 579                .proc_handler   = proc_dointvec,
 580        },
 581#endif
 582#ifdef CONFIG_SPARC64
 583        {
 584                .procname       = "tsb-ratio",
 585                .data           = &sysctl_tsb_ratio,
 586                .maxlen         = sizeof (int),
 587                .mode           = 0644,
 588                .proc_handler   = proc_dointvec,
 589        },
 590#endif
 591#ifdef __hppa__
 592        {
 593                .procname       = "soft-power",
 594                .data           = &pwrsw_enabled,
 595                .maxlen         = sizeof (int),
 596                .mode           = 0644,
 597                .proc_handler   = proc_dointvec,
 598        },
 599#endif
 600#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 601        {
 602                .procname       = "unaligned-trap",
 603                .data           = &unaligned_enabled,
 604                .maxlen         = sizeof (int),
 605                .mode           = 0644,
 606                .proc_handler   = proc_dointvec,
 607        },
 608#endif
 609        {
 610                .procname       = "ctrl-alt-del",
 611                .data           = &C_A_D,
 612                .maxlen         = sizeof(int),
 613                .mode           = 0644,
 614                .proc_handler   = proc_dointvec,
 615        },
 616#ifdef CONFIG_FUNCTION_TRACER
 617        {
 618                .procname       = "ftrace_enabled",
 619                .data           = &ftrace_enabled,
 620                .maxlen         = sizeof(int),
 621                .mode           = 0644,
 622                .proc_handler   = ftrace_enable_sysctl,
 623        },
 624#endif
 625#ifdef CONFIG_STACK_TRACER
 626        {
 627                .procname       = "stack_tracer_enabled",
 628                .data           = &stack_tracer_enabled,
 629                .maxlen         = sizeof(int),
 630                .mode           = 0644,
 631                .proc_handler   = stack_trace_sysctl,
 632        },
 633#endif
 634#ifdef CONFIG_TRACING
 635        {
 636                .procname       = "ftrace_dump_on_oops",
 637                .data           = &ftrace_dump_on_oops,
 638                .maxlen         = sizeof(int),
 639                .mode           = 0644,
 640                .proc_handler   = proc_dointvec,
 641        },
 642        {
 643                .procname       = "traceoff_on_warning",
 644                .data           = &__disable_trace_on_warning,
 645                .maxlen         = sizeof(__disable_trace_on_warning),
 646                .mode           = 0644,
 647                .proc_handler   = proc_dointvec,
 648        },
 649        {
 650                .procname       = "tracepoint_printk",
 651                .data           = &tracepoint_printk,
 652                .maxlen         = sizeof(tracepoint_printk),
 653                .mode           = 0644,
 654                .proc_handler   = tracepoint_printk_sysctl,
 655        },
 656#endif
 657#ifdef CONFIG_KEXEC_CORE
 658        {
 659                .procname       = "kexec_load_disabled",
 660                .data           = &kexec_load_disabled,
 661                .maxlen         = sizeof(int),
 662                .mode           = 0644,
 663                /* only handle a transition from default "0" to "1" */
 664                .proc_handler   = proc_dointvec_minmax,
 665                .extra1         = &one,
 666                .extra2         = &one,
 667        },
 668#endif
 669#ifdef CONFIG_MODULES
 670        {
 671                .procname       = "modprobe",
 672                .data           = &modprobe_path,
 673                .maxlen         = KMOD_PATH_LEN,
 674                .mode           = 0644,
 675                .proc_handler   = proc_dostring,
 676        },
 677        {
 678                .procname       = "modules_disabled",
 679                .data           = &modules_disabled,
 680                .maxlen         = sizeof(int),
 681                .mode           = 0644,
 682                /* only handle a transition from default "0" to "1" */
 683                .proc_handler   = proc_dointvec_minmax,
 684                .extra1         = &one,
 685                .extra2         = &one,
 686        },
 687#endif
 688#ifdef CONFIG_UEVENT_HELPER
 689        {
 690                .procname       = "hotplug",
 691                .data           = &uevent_helper,
 692                .maxlen         = UEVENT_HELPER_PATH_LEN,
 693                .mode           = 0644,
 694                .proc_handler   = proc_dostring,
 695        },
 696#endif
 697#ifdef CONFIG_CHR_DEV_SG
 698        {
 699                .procname       = "sg-big-buff",
 700                .data           = &sg_big_buff,
 701                .maxlen         = sizeof (int),
 702                .mode           = 0444,
 703                .proc_handler   = proc_dointvec,
 704        },
 705#endif
 706#ifdef CONFIG_BSD_PROCESS_ACCT
 707        {
 708                .procname       = "acct",
 709                .data           = &acct_parm,
 710                .maxlen         = 3*sizeof(int),
 711                .mode           = 0644,
 712                .proc_handler   = proc_dointvec,
 713        },
 714#endif
 715#ifdef CONFIG_MAGIC_SYSRQ
 716        {
 717                .procname       = "sysrq",
 718                .data           = &__sysrq_enabled,
 719                .maxlen         = sizeof (int),
 720                .mode           = 0644,
 721                .proc_handler   = sysrq_sysctl_handler,
 722        },
 723#endif
 724#ifdef CONFIG_PROC_SYSCTL
 725        {
 726                .procname       = "cad_pid",
 727                .data           = NULL,
 728                .maxlen         = sizeof (int),
 729                .mode           = 0600,
 730                .proc_handler   = proc_do_cad_pid,
 731        },
 732#endif
 733        {
 734                .procname       = "threads-max",
 735                .data           = NULL,
 736                .maxlen         = sizeof(int),
 737                .mode           = 0644,
 738                .proc_handler   = sysctl_max_threads,
 739        },
 740        {
 741                .procname       = "random",
 742                .mode           = 0555,
 743                .child          = random_table,
 744        },
 745        {
 746                .procname       = "usermodehelper",
 747                .mode           = 0555,
 748                .child          = usermodehelper_table,
 749        },
 750#ifdef CONFIG_FW_LOADER_USER_HELPER
 751        {
 752                .procname       = "firmware_config",
 753                .mode           = 0555,
 754                .child          = firmware_config_table,
 755        },
 756#endif
 757        {
 758                .procname       = "overflowuid",
 759                .data           = &overflowuid,
 760                .maxlen         = sizeof(int),
 761                .mode           = 0644,
 762                .proc_handler   = proc_dointvec_minmax,
 763                .extra1         = &minolduid,
 764                .extra2         = &maxolduid,
 765        },
 766        {
 767                .procname       = "overflowgid",
 768                .data           = &overflowgid,
 769                .maxlen         = sizeof(int),
 770                .mode           = 0644,
 771                .proc_handler   = proc_dointvec_minmax,
 772                .extra1         = &minolduid,
 773                .extra2         = &maxolduid,
 774        },
 775#ifdef CONFIG_S390
 776#ifdef CONFIG_MATHEMU
 777        {
 778                .procname       = "ieee_emulation_warnings",
 779                .data           = &sysctl_ieee_emulation_warnings,
 780                .maxlen         = sizeof(int),
 781                .mode           = 0644,
 782                .proc_handler   = proc_dointvec,
 783        },
 784#endif
 785        {
 786                .procname       = "userprocess_debug",
 787                .data           = &show_unhandled_signals,
 788                .maxlen         = sizeof(int),
 789                .mode           = 0644,
 790                .proc_handler   = proc_dointvec,
 791        },
 792#endif
 793        {
 794                .procname       = "pid_max",
 795                .data           = &pid_max,
 796                .maxlen         = sizeof (int),
 797                .mode           = 0644,
 798                .proc_handler   = proc_dointvec_minmax,
 799                .extra1         = &pid_max_min,
 800                .extra2         = &pid_max_max,
 801        },
 802        {
 803                .procname       = "panic_on_oops",
 804                .data           = &panic_on_oops,
 805                .maxlen         = sizeof(int),
 806                .mode           = 0644,
 807                .proc_handler   = proc_dointvec,
 808        },
 809#if defined CONFIG_PRINTK
 810        {
 811                .procname       = "printk",
 812                .data           = &console_loglevel,
 813                .maxlen         = 4*sizeof(int),
 814                .mode           = 0644,
 815                .proc_handler   = proc_dointvec,
 816        },
 817        {
 818                .procname       = "printk_ratelimit",
 819                .data           = &printk_ratelimit_state.interval,
 820                .maxlen         = sizeof(int),
 821                .mode           = 0644,
 822                .proc_handler   = proc_dointvec_jiffies,
 823        },
 824        {
 825                .procname       = "printk_ratelimit_burst",
 826                .data           = &printk_ratelimit_state.burst,
 827                .maxlen         = sizeof(int),
 828                .mode           = 0644,
 829                .proc_handler   = proc_dointvec,
 830        },
 831        {
 832                .procname       = "printk_delay",
 833                .data           = &printk_delay_msec,
 834                .maxlen         = sizeof(int),
 835                .mode           = 0644,
 836                .proc_handler   = proc_dointvec_minmax,
 837                .extra1         = &zero,
 838                .extra2         = &ten_thousand,
 839        },
 840        {
 841                .procname       = "printk_devkmsg",
 842                .data           = devkmsg_log_str,
 843                .maxlen         = DEVKMSG_STR_MAX_SIZE,
 844                .mode           = 0644,
 845                .proc_handler   = devkmsg_sysctl_set_loglvl,
 846        },
 847        {
 848                .procname       = "dmesg_restrict",
 849                .data           = &dmesg_restrict,
 850                .maxlen         = sizeof(int),
 851                .mode           = 0644,
 852                .proc_handler   = proc_dointvec_minmax_sysadmin,
 853                .extra1         = &zero,
 854                .extra2         = &one,
 855        },
 856        {
 857                .procname       = "kptr_restrict",
 858                .data           = &kptr_restrict,
 859                .maxlen         = sizeof(int),
 860                .mode           = 0644,
 861                .proc_handler   = proc_dointvec_minmax_sysadmin,
 862                .extra1         = &zero,
 863                .extra2         = &two,
 864        },
 865#endif
 866        {
 867                .procname       = "ngroups_max",
 868                .data           = &ngroups_max,
 869                .maxlen         = sizeof (int),
 870                .mode           = 0444,
 871                .proc_handler   = proc_dointvec,
 872        },
 873        {
 874                .procname       = "cap_last_cap",
 875                .data           = (void *)&cap_last_cap,
 876                .maxlen         = sizeof(int),
 877                .mode           = 0444,
 878                .proc_handler   = proc_dointvec,
 879        },
 880#if defined(CONFIG_LOCKUP_DETECTOR)
 881        {
 882                .procname       = "watchdog",
 883                .data           = &watchdog_user_enabled,
 884                .maxlen         = sizeof(int),
 885                .mode           = 0644,
 886                .proc_handler   = proc_watchdog,
 887                .extra1         = &zero,
 888                .extra2         = &one,
 889        },
 890        {
 891                .procname       = "watchdog_thresh",
 892                .data           = &watchdog_thresh,
 893                .maxlen         = sizeof(int),
 894                .mode           = 0644,
 895                .proc_handler   = proc_watchdog_thresh,
 896                .extra1         = &zero,
 897                .extra2         = &sixty,
 898        },
 899        {
 900                .procname       = "nmi_watchdog",
 901                .data           = &nmi_watchdog_user_enabled,
 902                .maxlen         = sizeof(int),
 903                .mode           = NMI_WATCHDOG_SYSCTL_PERM,
 904                .proc_handler   = proc_nmi_watchdog,
 905                .extra1         = &zero,
 906                .extra2         = &one,
 907        },
 908        {
 909                .procname       = "watchdog_cpumask",
 910                .data           = &watchdog_cpumask_bits,
 911                .maxlen         = NR_CPUS,
 912                .mode           = 0644,
 913                .proc_handler   = proc_watchdog_cpumask,
 914        },
 915#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 916        {
 917                .procname       = "soft_watchdog",
 918                .data           = &soft_watchdog_user_enabled,
 919                .maxlen         = sizeof(int),
 920                .mode           = 0644,
 921                .proc_handler   = proc_soft_watchdog,
 922                .extra1         = &zero,
 923                .extra2         = &one,
 924        },
 925        {
 926                .procname       = "softlockup_panic",
 927                .data           = &softlockup_panic,
 928                .maxlen         = sizeof(int),
 929                .mode           = 0644,
 930                .proc_handler   = proc_dointvec_minmax,
 931                .extra1         = &zero,
 932                .extra2         = &one,
 933        },
 934#ifdef CONFIG_SMP
 935        {
 936                .procname       = "softlockup_all_cpu_backtrace",
 937                .data           = &sysctl_softlockup_all_cpu_backtrace,
 938                .maxlen         = sizeof(int),
 939                .mode           = 0644,
 940                .proc_handler   = proc_dointvec_minmax,
 941                .extra1         = &zero,
 942                .extra2         = &one,
 943        },
 944#endif /* CONFIG_SMP */
 945#endif
 946#ifdef CONFIG_HARDLOCKUP_DETECTOR
 947        {
 948                .procname       = "hardlockup_panic",
 949                .data           = &hardlockup_panic,
 950                .maxlen         = sizeof(int),
 951                .mode           = 0644,
 952                .proc_handler   = proc_dointvec_minmax,
 953                .extra1         = &zero,
 954                .extra2         = &one,
 955        },
 956#ifdef CONFIG_SMP
 957        {
 958                .procname       = "hardlockup_all_cpu_backtrace",
 959                .data           = &sysctl_hardlockup_all_cpu_backtrace,
 960                .maxlen         = sizeof(int),
 961                .mode           = 0644,
 962                .proc_handler   = proc_dointvec_minmax,
 963                .extra1         = &zero,
 964                .extra2         = &one,
 965        },
 966#endif /* CONFIG_SMP */
 967#endif
 968#endif
 969
 970#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 971        {
 972                .procname       = "unknown_nmi_panic",
 973                .data           = &unknown_nmi_panic,
 974                .maxlen         = sizeof (int),
 975                .mode           = 0644,
 976                .proc_handler   = proc_dointvec,
 977        },
 978#endif
 979#if defined(CONFIG_X86)
 980        {
 981                .procname       = "panic_on_unrecovered_nmi",
 982                .data           = &panic_on_unrecovered_nmi,
 983                .maxlen         = sizeof(int),
 984                .mode           = 0644,
 985                .proc_handler   = proc_dointvec,
 986        },
 987        {
 988                .procname       = "panic_on_io_nmi",
 989                .data           = &panic_on_io_nmi,
 990                .maxlen         = sizeof(int),
 991                .mode           = 0644,
 992                .proc_handler   = proc_dointvec,
 993        },
 994#ifdef CONFIG_DEBUG_STACKOVERFLOW
 995        {
 996                .procname       = "panic_on_stackoverflow",
 997                .data           = &sysctl_panic_on_stackoverflow,
 998                .maxlen         = sizeof(int),
 999                .mode           = 0644,
1000                .proc_handler   = proc_dointvec,
1001        },
1002#endif
1003        {
1004                .procname       = "bootloader_type",
1005                .data           = &bootloader_type,
1006                .maxlen         = sizeof (int),
1007                .mode           = 0444,
1008                .proc_handler   = proc_dointvec,
1009        },
1010        {
1011                .procname       = "bootloader_version",
1012                .data           = &bootloader_version,
1013                .maxlen         = sizeof (int),
1014                .mode           = 0444,
1015                .proc_handler   = proc_dointvec,
1016        },
1017        {
1018                .procname       = "io_delay_type",
1019                .data           = &io_delay_type,
1020                .maxlen         = sizeof(int),
1021                .mode           = 0644,
1022                .proc_handler   = proc_dointvec,
1023        },
1024#endif
1025#if defined(CONFIG_MMU)
1026        {
1027                .procname       = "randomize_va_space",
1028                .data           = &randomize_va_space,
1029                .maxlen         = sizeof(int),
1030                .mode           = 0644,
1031                .proc_handler   = proc_dointvec,
1032        },
1033#endif
1034#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1035        {
1036                .procname       = "spin_retry",
1037                .data           = &spin_retry,
1038                .maxlen         = sizeof (int),
1039                .mode           = 0644,
1040                .proc_handler   = proc_dointvec,
1041        },
1042#endif
1043#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1044        {
1045                .procname       = "acpi_video_flags",
1046                .data           = &acpi_realmode_flags,
1047                .maxlen         = sizeof (unsigned long),
1048                .mode           = 0644,
1049                .proc_handler   = proc_doulongvec_minmax,
1050        },
1051#endif
1052#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1053        {
1054                .procname       = "ignore-unaligned-usertrap",
1055                .data           = &no_unaligned_warning,
1056                .maxlen         = sizeof (int),
1057                .mode           = 0644,
1058                .proc_handler   = proc_dointvec,
1059        },
1060#endif
1061#ifdef CONFIG_IA64
1062        {
1063                .procname       = "unaligned-dump-stack",
1064                .data           = &unaligned_dump_stack,
1065                .maxlen         = sizeof (int),
1066                .mode           = 0644,
1067                .proc_handler   = proc_dointvec,
1068        },
1069#endif
1070#ifdef CONFIG_DETECT_HUNG_TASK
1071        {
1072                .procname       = "hung_task_panic",
1073                .data           = &sysctl_hung_task_panic,
1074                .maxlen         = sizeof(int),
1075                .mode           = 0644,
1076                .proc_handler   = proc_dointvec_minmax,
1077                .extra1         = &zero,
1078                .extra2         = &one,
1079        },
1080        {
1081                .procname       = "hung_task_check_count",
1082                .data           = &sysctl_hung_task_check_count,
1083                .maxlen         = sizeof(int),
1084                .mode           = 0644,
1085                .proc_handler   = proc_dointvec_minmax,
1086                .extra1         = &zero,
1087        },
1088        {
1089                .procname       = "hung_task_timeout_secs",
1090                .data           = &sysctl_hung_task_timeout_secs,
1091                .maxlen         = sizeof(unsigned long),
1092                .mode           = 0644,
1093                .proc_handler   = proc_dohung_task_timeout_secs,
1094                .extra2         = &hung_task_timeout_max,
1095        },
1096        {
1097                .procname       = "hung_task_check_interval_secs",
1098                .data           = &sysctl_hung_task_check_interval_secs,
1099                .maxlen         = sizeof(unsigned long),
1100                .mode           = 0644,
1101                .proc_handler   = proc_dohung_task_timeout_secs,
1102                .extra2         = &hung_task_timeout_max,
1103        },
1104        {
1105                .procname       = "hung_task_warnings",
1106                .data           = &sysctl_hung_task_warnings,
1107                .maxlen         = sizeof(int),
1108                .mode           = 0644,
1109                .proc_handler   = proc_dointvec_minmax,
1110                .extra1         = &neg_one,
1111        },
1112#endif
1113#ifdef CONFIG_RT_MUTEXES
1114        {
1115                .procname       = "max_lock_depth",
1116                .data           = &max_lock_depth,
1117                .maxlen         = sizeof(int),
1118                .mode           = 0644,
1119                .proc_handler   = proc_dointvec,
1120        },
1121#endif
1122        {
1123                .procname       = "poweroff_cmd",
1124                .data           = &poweroff_cmd,
1125                .maxlen         = POWEROFF_CMD_PATH_LEN,
1126                .mode           = 0644,
1127                .proc_handler   = proc_dostring,
1128        },
1129#ifdef CONFIG_KEYS
1130        {
1131                .procname       = "keys",
1132                .mode           = 0555,
1133                .child          = key_sysctls,
1134        },
1135#endif
1136#ifdef CONFIG_PERF_EVENTS
1137        /*
1138         * User-space scripts rely on the existence of this file
1139         * as a feature check for perf_events being enabled.
1140         *
1141         * So it's an ABI, do not remove!
1142         */
1143        {
1144                .procname       = "perf_event_paranoid",
1145                .data           = &sysctl_perf_event_paranoid,
1146                .maxlen         = sizeof(sysctl_perf_event_paranoid),
1147                .mode           = 0644,
1148                .proc_handler   = proc_dointvec,
1149        },
1150        {
1151                .procname       = "perf_event_mlock_kb",
1152                .data           = &sysctl_perf_event_mlock,
1153                .maxlen         = sizeof(sysctl_perf_event_mlock),
1154                .mode           = 0644,
1155                .proc_handler   = proc_dointvec,
1156        },
1157        {
1158                .procname       = "perf_event_max_sample_rate",
1159                .data           = &sysctl_perf_event_sample_rate,
1160                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1161                .mode           = 0644,
1162                .proc_handler   = perf_proc_update_handler,
1163                .extra1         = &one,
1164        },
1165        {
1166                .procname       = "perf_cpu_time_max_percent",
1167                .data           = &sysctl_perf_cpu_time_max_percent,
1168                .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1169                .mode           = 0644,
1170                .proc_handler   = perf_cpu_time_max_percent_handler,
1171                .extra1         = &zero,
1172                .extra2         = &one_hundred,
1173        },
1174        {
1175                .procname       = "perf_event_max_stack",
1176                .data           = &sysctl_perf_event_max_stack,
1177                .maxlen         = sizeof(sysctl_perf_event_max_stack),
1178                .mode           = 0644,
1179                .proc_handler   = perf_event_max_stack_handler,
1180                .extra1         = &zero,
1181                .extra2         = &six_hundred_forty_kb,
1182        },
1183        {
1184                .procname       = "perf_event_max_contexts_per_stack",
1185                .data           = &sysctl_perf_event_max_contexts_per_stack,
1186                .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1187                .mode           = 0644,
1188                .proc_handler   = perf_event_max_stack_handler,
1189                .extra1         = &zero,
1190                .extra2         = &one_thousand,
1191        },
1192#endif
1193        {
1194                .procname       = "panic_on_warn",
1195                .data           = &panic_on_warn,
1196                .maxlen         = sizeof(int),
1197                .mode           = 0644,
1198                .proc_handler   = proc_dointvec_minmax,
1199                .extra1         = &zero,
1200                .extra2         = &one,
1201        },
1202#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1203        {
1204                .procname       = "timer_migration",
1205                .data           = &sysctl_timer_migration,
1206                .maxlen         = sizeof(unsigned int),
1207                .mode           = 0644,
1208                .proc_handler   = timer_migration_handler,
1209                .extra1         = &zero,
1210                .extra2         = &one,
1211        },
1212#endif
1213#ifdef CONFIG_BPF_SYSCALL
1214        {
1215                .procname       = "unprivileged_bpf_disabled",
1216                .data           = &sysctl_unprivileged_bpf_disabled,
1217                .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1218                .mode           = 0644,
1219                /* only handle a transition from default "0" to "1" */
1220                .proc_handler   = proc_dointvec_minmax,
1221                .extra1         = &one,
1222                .extra2         = &one,
1223        },
1224#endif
1225#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1226        {
1227                .procname       = "panic_on_rcu_stall",
1228                .data           = &sysctl_panic_on_rcu_stall,
1229                .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1230                .mode           = 0644,
1231                .proc_handler   = proc_dointvec_minmax,
1232                .extra1         = &zero,
1233                .extra2         = &one,
1234        },
1235#endif
1236        { }
1237};
1238
1239static struct ctl_table vm_table[] = {
1240        {
1241                .procname       = "overcommit_memory",
1242                .data           = &sysctl_overcommit_memory,
1243                .maxlen         = sizeof(sysctl_overcommit_memory),
1244                .mode           = 0644,
1245                .proc_handler   = proc_dointvec_minmax,
1246                .extra1         = &zero,
1247                .extra2         = &two,
1248        },
1249        {
1250                .procname       = "panic_on_oom",
1251                .data           = &sysctl_panic_on_oom,
1252                .maxlen         = sizeof(sysctl_panic_on_oom),
1253                .mode           = 0644,
1254                .proc_handler   = proc_dointvec_minmax,
1255                .extra1         = &zero,
1256                .extra2         = &two,
1257        },
1258        {
1259                .procname       = "oom_kill_allocating_task",
1260                .data           = &sysctl_oom_kill_allocating_task,
1261                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1262                .mode           = 0644,
1263                .proc_handler   = proc_dointvec,
1264        },
1265        {
1266                .procname       = "oom_dump_tasks",
1267                .data           = &sysctl_oom_dump_tasks,
1268                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1269                .mode           = 0644,
1270                .proc_handler   = proc_dointvec,
1271        },
1272        {
1273                .procname       = "overcommit_ratio",
1274                .data           = &sysctl_overcommit_ratio,
1275                .maxlen         = sizeof(sysctl_overcommit_ratio),
1276                .mode           = 0644,
1277                .proc_handler   = overcommit_ratio_handler,
1278        },
1279        {
1280                .procname       = "overcommit_kbytes",
1281                .data           = &sysctl_overcommit_kbytes,
1282                .maxlen         = sizeof(sysctl_overcommit_kbytes),
1283                .mode           = 0644,
1284                .proc_handler   = overcommit_kbytes_handler,
1285        },
1286        {
1287                .procname       = "page-cluster", 
1288                .data           = &page_cluster,
1289                .maxlen         = sizeof(int),
1290                .mode           = 0644,
1291                .proc_handler   = proc_dointvec_minmax,
1292                .extra1         = &zero,
1293        },
1294        {
1295                .procname       = "dirty_background_ratio",
1296                .data           = &dirty_background_ratio,
1297                .maxlen         = sizeof(dirty_background_ratio),
1298                .mode           = 0644,
1299                .proc_handler   = dirty_background_ratio_handler,
1300                .extra1         = &zero,
1301                .extra2         = &one_hundred,
1302        },
1303        {
1304                .procname       = "dirty_background_bytes",
1305                .data           = &dirty_background_bytes,
1306                .maxlen         = sizeof(dirty_background_bytes),
1307                .mode           = 0644,
1308                .proc_handler   = dirty_background_bytes_handler,
1309                .extra1         = &one_ul,
1310        },
1311        {
1312                .procname       = "dirty_ratio",
1313                .data           = &vm_dirty_ratio,
1314                .maxlen         = sizeof(vm_dirty_ratio),
1315                .mode           = 0644,
1316                .proc_handler   = dirty_ratio_handler,
1317                .extra1         = &zero,
1318                .extra2         = &one_hundred,
1319        },
1320        {
1321                .procname       = "dirty_bytes",
1322                .data           = &vm_dirty_bytes,
1323                .maxlen         = sizeof(vm_dirty_bytes),
1324                .mode           = 0644,
1325                .proc_handler   = dirty_bytes_handler,
1326                .extra1         = &dirty_bytes_min,
1327        },
1328        {
1329                .procname       = "dirty_writeback_centisecs",
1330                .data           = &dirty_writeback_interval,
1331                .maxlen         = sizeof(dirty_writeback_interval),
1332                .mode           = 0644,
1333                .proc_handler   = dirty_writeback_centisecs_handler,
1334        },
1335        {
1336                .procname       = "dirty_expire_centisecs",
1337                .data           = &dirty_expire_interval,
1338                .maxlen         = sizeof(dirty_expire_interval),
1339                .mode           = 0644,
1340                .proc_handler   = proc_dointvec_minmax,
1341                .extra1         = &zero,
1342        },
1343        {
1344                .procname       = "dirtytime_expire_seconds",
1345                .data           = &dirtytime_expire_interval,
1346                .maxlen         = sizeof(dirtytime_expire_interval),
1347                .mode           = 0644,
1348                .proc_handler   = dirtytime_interval_handler,
1349                .extra1         = &zero,
1350        },
1351        {
1352                .procname       = "swappiness",
1353                .data           = &vm_swappiness,
1354                .maxlen         = sizeof(vm_swappiness),
1355                .mode           = 0644,
1356                .proc_handler   = proc_dointvec_minmax,
1357                .extra1         = &zero,
1358                .extra2         = &one_hundred,
1359        },
1360#ifdef CONFIG_HUGETLB_PAGE
1361        {
1362                .procname       = "nr_hugepages",
1363                .data           = NULL,
1364                .maxlen         = sizeof(unsigned long),
1365                .mode           = 0644,
1366                .proc_handler   = hugetlb_sysctl_handler,
1367        },
1368#ifdef CONFIG_NUMA
1369        {
1370                .procname       = "nr_hugepages_mempolicy",
1371                .data           = NULL,
1372                .maxlen         = sizeof(unsigned long),
1373                .mode           = 0644,
1374                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1375        },
1376        {
1377                .procname               = "numa_stat",
1378                .data                   = &sysctl_vm_numa_stat,
1379                .maxlen                 = sizeof(int),
1380                .mode                   = 0644,
1381                .proc_handler   = sysctl_vm_numa_stat_handler,
1382                .extra1                 = &zero,
1383                .extra2                 = &one,
1384        },
1385#endif
1386         {
1387                .procname       = "hugetlb_shm_group",
1388                .data           = &sysctl_hugetlb_shm_group,
1389                .maxlen         = sizeof(gid_t),
1390                .mode           = 0644,
1391                .proc_handler   = proc_dointvec,
1392         },
1393        {
1394                .procname       = "nr_overcommit_hugepages",
1395                .data           = NULL,
1396                .maxlen         = sizeof(unsigned long),
1397                .mode           = 0644,
1398                .proc_handler   = hugetlb_overcommit_handler,
1399        },
1400#endif
1401        {
1402                .procname       = "lowmem_reserve_ratio",
1403                .data           = &sysctl_lowmem_reserve_ratio,
1404                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1405                .mode           = 0644,
1406                .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1407        },
1408        {
1409                .procname       = "drop_caches",
1410                .data           = &sysctl_drop_caches,
1411                .maxlen         = sizeof(int),
1412                .mode           = 0644,
1413                .proc_handler   = drop_caches_sysctl_handler,
1414                .extra1         = &one,
1415                .extra2         = &four,
1416        },
1417#ifdef CONFIG_COMPACTION
1418        {
1419                .procname       = "compact_memory",
1420                .data           = &sysctl_compact_memory,
1421                .maxlen         = sizeof(int),
1422                .mode           = 0200,
1423                .proc_handler   = sysctl_compaction_handler,
1424        },
1425        {
1426                .procname       = "extfrag_threshold",
1427                .data           = &sysctl_extfrag_threshold,
1428                .maxlen         = sizeof(int),
1429                .mode           = 0644,
1430                .proc_handler   = sysctl_extfrag_handler,
1431                .extra1         = &min_extfrag_threshold,
1432                .extra2         = &max_extfrag_threshold,
1433        },
1434        {
1435                .procname       = "compact_unevictable_allowed",
1436                .data           = &sysctl_compact_unevictable_allowed,
1437                .maxlen         = sizeof(int),
1438                .mode           = 0644,
1439                .proc_handler   = proc_dointvec,
1440                .extra1         = &zero,
1441                .extra2         = &one,
1442        },
1443
1444#endif /* CONFIG_COMPACTION */
1445        {
1446                .procname       = "min_free_kbytes",
1447                .data           = &min_free_kbytes,
1448                .maxlen         = sizeof(min_free_kbytes),
1449                .mode           = 0644,
1450                .proc_handler   = min_free_kbytes_sysctl_handler,
1451                .extra1         = &zero,
1452        },
1453        {
1454                .procname       = "watermark_scale_factor",
1455                .data           = &watermark_scale_factor,
1456                .maxlen         = sizeof(watermark_scale_factor),
1457                .mode           = 0644,
1458                .proc_handler   = watermark_scale_factor_sysctl_handler,
1459                .extra1         = &one,
1460                .extra2         = &one_thousand,
1461        },
1462        {
1463                .procname       = "percpu_pagelist_fraction",
1464                .data           = &percpu_pagelist_fraction,
1465                .maxlen         = sizeof(percpu_pagelist_fraction),
1466                .mode           = 0644,
1467                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1468                .extra1         = &zero,
1469        },
1470#ifdef CONFIG_MMU
1471        {
1472                .procname       = "max_map_count",
1473                .data           = &sysctl_max_map_count,
1474                .maxlen         = sizeof(sysctl_max_map_count),
1475                .mode           = 0644,
1476                .proc_handler   = proc_dointvec_minmax,
1477                .extra1         = &zero,
1478        },
1479#else
1480        {
1481                .procname       = "nr_trim_pages",
1482                .data           = &sysctl_nr_trim_pages,
1483                .maxlen         = sizeof(sysctl_nr_trim_pages),
1484                .mode           = 0644,
1485                .proc_handler   = proc_dointvec_minmax,
1486                .extra1         = &zero,
1487        },
1488#endif
1489        {
1490                .procname       = "laptop_mode",
1491                .data           = &laptop_mode,
1492                .maxlen         = sizeof(laptop_mode),
1493                .mode           = 0644,
1494                .proc_handler   = proc_dointvec_jiffies,
1495        },
1496        {
1497                .procname       = "block_dump",
1498                .data           = &block_dump,
1499                .maxlen         = sizeof(block_dump),
1500                .mode           = 0644,
1501                .proc_handler   = proc_dointvec,
1502                .extra1         = &zero,
1503        },
1504        {
1505                .procname       = "vfs_cache_pressure",
1506                .data           = &sysctl_vfs_cache_pressure,
1507                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1508                .mode           = 0644,
1509                .proc_handler   = proc_dointvec,
1510                .extra1         = &zero,
1511        },
1512#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1513        {
1514                .procname       = "legacy_va_layout",
1515                .data           = &sysctl_legacy_va_layout,
1516                .maxlen         = sizeof(sysctl_legacy_va_layout),
1517                .mode           = 0644,
1518                .proc_handler   = proc_dointvec,
1519                .extra1         = &zero,
1520        },
1521#endif
1522#ifdef CONFIG_NUMA
1523        {
1524                .procname       = "zone_reclaim_mode",
1525                .data           = &node_reclaim_mode,
1526                .maxlen         = sizeof(node_reclaim_mode),
1527                .mode           = 0644,
1528                .proc_handler   = proc_dointvec,
1529                .extra1         = &zero,
1530        },
1531        {
1532                .procname       = "min_unmapped_ratio",
1533                .data           = &sysctl_min_unmapped_ratio,
1534                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1535                .mode           = 0644,
1536                .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1537                .extra1         = &zero,
1538                .extra2         = &one_hundred,
1539        },
1540        {
1541                .procname       = "min_slab_ratio",
1542                .data           = &sysctl_min_slab_ratio,
1543                .maxlen         = sizeof(sysctl_min_slab_ratio),
1544                .mode           = 0644,
1545                .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1546                .extra1         = &zero,
1547                .extra2         = &one_hundred,
1548        },
1549#endif
1550#ifdef CONFIG_SMP
1551        {
1552                .procname       = "stat_interval",
1553                .data           = &sysctl_stat_interval,
1554                .maxlen         = sizeof(sysctl_stat_interval),
1555                .mode           = 0644,
1556                .proc_handler   = proc_dointvec_jiffies,
1557        },
1558        {
1559                .procname       = "stat_refresh",
1560                .data           = NULL,
1561                .maxlen         = 0,
1562                .mode           = 0600,
1563                .proc_handler   = vmstat_refresh,
1564        },
1565#endif
1566#ifdef CONFIG_MMU
1567        {
1568                .procname       = "mmap_min_addr",
1569                .data           = &dac_mmap_min_addr,
1570                .maxlen         = sizeof(unsigned long),
1571                .mode           = 0644,
1572                .proc_handler   = mmap_min_addr_handler,
1573        },
1574#endif
1575#ifdef CONFIG_NUMA
1576        {
1577                .procname       = "numa_zonelist_order",
1578                .data           = &numa_zonelist_order,
1579                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1580                .mode           = 0644,
1581                .proc_handler   = numa_zonelist_order_handler,
1582        },
1583#endif
1584#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1585   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1586        {
1587                .procname       = "vdso_enabled",
1588#ifdef CONFIG_X86_32
1589                .data           = &vdso32_enabled,
1590                .maxlen         = sizeof(vdso32_enabled),
1591#else
1592                .data           = &vdso_enabled,
1593                .maxlen         = sizeof(vdso_enabled),
1594#endif
1595                .mode           = 0644,
1596                .proc_handler   = proc_dointvec,
1597                .extra1         = &zero,
1598        },
1599#endif
1600#ifdef CONFIG_HIGHMEM
1601        {
1602                .procname       = "highmem_is_dirtyable",
1603                .data           = &vm_highmem_is_dirtyable,
1604                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1605                .mode           = 0644,
1606                .proc_handler   = proc_dointvec_minmax,
1607                .extra1         = &zero,
1608                .extra2         = &one,
1609        },
1610#endif
1611#ifdef CONFIG_MEMORY_FAILURE
1612        {
1613                .procname       = "memory_failure_early_kill",
1614                .data           = &sysctl_memory_failure_early_kill,
1615                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1616                .mode           = 0644,
1617                .proc_handler   = proc_dointvec_minmax,
1618                .extra1         = &zero,
1619                .extra2         = &one,
1620        },
1621        {
1622                .procname       = "memory_failure_recovery",
1623                .data           = &sysctl_memory_failure_recovery,
1624                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1625                .mode           = 0644,
1626                .proc_handler   = proc_dointvec_minmax,
1627                .extra1         = &zero,
1628                .extra2         = &one,
1629        },
1630#endif
1631        {
1632                .procname       = "user_reserve_kbytes",
1633                .data           = &sysctl_user_reserve_kbytes,
1634                .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1635                .mode           = 0644,
1636                .proc_handler   = proc_doulongvec_minmax,
1637        },
1638        {
1639                .procname       = "admin_reserve_kbytes",
1640                .data           = &sysctl_admin_reserve_kbytes,
1641                .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1642                .mode           = 0644,
1643                .proc_handler   = proc_doulongvec_minmax,
1644        },
1645#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1646        {
1647                .procname       = "mmap_rnd_bits",
1648                .data           = &mmap_rnd_bits,
1649                .maxlen         = sizeof(mmap_rnd_bits),
1650                .mode           = 0600,
1651                .proc_handler   = proc_dointvec_minmax,
1652                .extra1         = (void *)&mmap_rnd_bits_min,
1653                .extra2         = (void *)&mmap_rnd_bits_max,
1654        },
1655#endif
1656#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1657        {
1658                .procname       = "mmap_rnd_compat_bits",
1659                .data           = &mmap_rnd_compat_bits,
1660                .maxlen         = sizeof(mmap_rnd_compat_bits),
1661                .mode           = 0600,
1662                .proc_handler   = proc_dointvec_minmax,
1663                .extra1         = (void *)&mmap_rnd_compat_bits_min,
1664                .extra2         = (void *)&mmap_rnd_compat_bits_max,
1665        },
1666#endif
1667        { }
1668};
1669
1670static struct ctl_table fs_table[] = {
1671        {
1672                .procname       = "inode-nr",
1673                .data           = &inodes_stat,
1674                .maxlen         = 2*sizeof(long),
1675                .mode           = 0444,
1676                .proc_handler   = proc_nr_inodes,
1677        },
1678        {
1679                .procname       = "inode-state",
1680                .data           = &inodes_stat,
1681                .maxlen         = 7*sizeof(long),
1682                .mode           = 0444,
1683                .proc_handler   = proc_nr_inodes,
1684        },
1685        {
1686                .procname       = "file-nr",
1687                .data           = &files_stat,
1688                .maxlen         = sizeof(files_stat),
1689                .mode           = 0444,
1690                .proc_handler   = proc_nr_files,
1691        },
1692        {
1693                .procname       = "file-max",
1694                .data           = &files_stat.max_files,
1695                .maxlen         = sizeof(files_stat.max_files),
1696                .mode           = 0644,
1697                .proc_handler   = proc_doulongvec_minmax,
1698        },
1699        {
1700                .procname       = "nr_open",
1701                .data           = &sysctl_nr_open,
1702                .maxlen         = sizeof(unsigned int),
1703                .mode           = 0644,
1704                .proc_handler   = proc_dointvec_minmax,
1705                .extra1         = &sysctl_nr_open_min,
1706                .extra2         = &sysctl_nr_open_max,
1707        },
1708        {
1709                .procname       = "dentry-state",
1710                .data           = &dentry_stat,
1711                .maxlen         = 6*sizeof(long),
1712                .mode           = 0444,
1713                .proc_handler   = proc_nr_dentry,
1714        },
1715        {
1716                .procname       = "overflowuid",
1717                .data           = &fs_overflowuid,
1718                .maxlen         = sizeof(int),
1719                .mode           = 0644,
1720                .proc_handler   = proc_dointvec_minmax,
1721                .extra1         = &minolduid,
1722                .extra2         = &maxolduid,
1723        },
1724        {
1725                .procname       = "overflowgid",
1726                .data           = &fs_overflowgid,
1727                .maxlen         = sizeof(int),
1728                .mode           = 0644,
1729                .proc_handler   = proc_dointvec_minmax,
1730                .extra1         = &minolduid,
1731                .extra2         = &maxolduid,
1732        },
1733#ifdef CONFIG_FILE_LOCKING
1734        {
1735                .procname       = "leases-enable",
1736                .data           = &leases_enable,
1737                .maxlen         = sizeof(int),
1738                .mode           = 0644,
1739                .proc_handler   = proc_dointvec,
1740        },
1741#endif
1742#ifdef CONFIG_DNOTIFY
1743        {
1744                .procname       = "dir-notify-enable",
1745                .data           = &dir_notify_enable,
1746                .maxlen         = sizeof(int),
1747                .mode           = 0644,
1748                .proc_handler   = proc_dointvec,
1749        },
1750#endif
1751#ifdef CONFIG_MMU
1752#ifdef CONFIG_FILE_LOCKING
1753        {
1754                .procname       = "lease-break-time",
1755                .data           = &lease_break_time,
1756                .maxlen         = sizeof(int),
1757                .mode           = 0644,
1758                .proc_handler   = proc_dointvec,
1759        },
1760#endif
1761#ifdef CONFIG_AIO
1762        {
1763                .procname       = "aio-nr",
1764                .data           = &aio_nr,
1765                .maxlen         = sizeof(aio_nr),
1766                .mode           = 0444,
1767                .proc_handler   = proc_doulongvec_minmax,
1768        },
1769        {
1770                .procname       = "aio-max-nr",
1771                .data           = &aio_max_nr,
1772                .maxlen         = sizeof(aio_max_nr),
1773                .mode           = 0644,
1774                .proc_handler   = proc_doulongvec_minmax,
1775        },
1776#endif /* CONFIG_AIO */
1777#ifdef CONFIG_INOTIFY_USER
1778        {
1779                .procname       = "inotify",
1780                .mode           = 0555,
1781                .child          = inotify_table,
1782        },
1783#endif  
1784#ifdef CONFIG_EPOLL
1785        {
1786                .procname       = "epoll",
1787                .mode           = 0555,
1788                .child          = epoll_table,
1789        },
1790#endif
1791#endif
1792        {
1793                .procname       = "protected_symlinks",
1794                .data           = &sysctl_protected_symlinks,
1795                .maxlen         = sizeof(int),
1796                .mode           = 0600,
1797                .proc_handler   = proc_dointvec_minmax,
1798                .extra1         = &zero,
1799                .extra2         = &one,
1800        },
1801        {
1802                .procname       = "protected_hardlinks",
1803                .data           = &sysctl_protected_hardlinks,
1804                .maxlen         = sizeof(int),
1805                .mode           = 0600,
1806                .proc_handler   = proc_dointvec_minmax,
1807                .extra1         = &zero,
1808                .extra2         = &one,
1809        },
1810        {
1811                .procname       = "protected_fifos",
1812                .data           = &sysctl_protected_fifos,
1813                .maxlen         = sizeof(int),
1814                .mode           = 0600,
1815                .proc_handler   = proc_dointvec_minmax,
1816                .extra1         = &zero,
1817                .extra2         = &two,
1818        },
1819        {
1820                .procname       = "protected_regular",
1821                .data           = &sysctl_protected_regular,
1822                .maxlen         = sizeof(int),
1823                .mode           = 0600,
1824                .proc_handler   = proc_dointvec_minmax,
1825                .extra1         = &zero,
1826                .extra2         = &two,
1827        },
1828        {
1829                .procname       = "suid_dumpable",
1830                .data           = &suid_dumpable,
1831                .maxlen         = sizeof(int),
1832                .mode           = 0644,
1833                .proc_handler   = proc_dointvec_minmax_coredump,
1834                .extra1         = &zero,
1835                .extra2         = &two,
1836        },
1837#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1838        {
1839                .procname       = "binfmt_misc",
1840                .mode           = 0555,
1841                .child          = sysctl_mount_point,
1842        },
1843#endif
1844        {
1845                .procname       = "pipe-max-size",
1846                .data           = &pipe_max_size,
1847                .maxlen         = sizeof(pipe_max_size),
1848                .mode           = 0644,
1849                .proc_handler   = proc_dopipe_max_size,
1850        },
1851        {
1852                .procname       = "pipe-user-pages-hard",
1853                .data           = &pipe_user_pages_hard,
1854                .maxlen         = sizeof(pipe_user_pages_hard),
1855                .mode           = 0644,
1856                .proc_handler   = proc_doulongvec_minmax,
1857        },
1858        {
1859                .procname       = "pipe-user-pages-soft",
1860                .data           = &pipe_user_pages_soft,
1861                .maxlen         = sizeof(pipe_user_pages_soft),
1862                .mode           = 0644,
1863                .proc_handler   = proc_doulongvec_minmax,
1864        },
1865        {
1866                .procname       = "mount-max",
1867                .data           = &sysctl_mount_max,
1868                .maxlen         = sizeof(unsigned int),
1869                .mode           = 0644,
1870                .proc_handler   = proc_dointvec_minmax,
1871                .extra1         = &one,
1872        },
1873        { }
1874};
1875
1876static struct ctl_table debug_table[] = {
1877#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1878        {
1879                .procname       = "exception-trace",
1880                .data           = &show_unhandled_signals,
1881                .maxlen         = sizeof(int),
1882                .mode           = 0644,
1883                .proc_handler   = proc_dointvec
1884        },
1885#endif
1886#if defined(CONFIG_OPTPROBES)
1887        {
1888                .procname       = "kprobes-optimization",
1889                .data           = &sysctl_kprobes_optimization,
1890                .maxlen         = sizeof(int),
1891                .mode           = 0644,
1892                .proc_handler   = proc_kprobes_optimization_handler,
1893                .extra1         = &zero,
1894                .extra2         = &one,
1895        },
1896#endif
1897        { }
1898};
1899
1900static struct ctl_table dev_table[] = {
1901        { }
1902};
1903
1904int __init sysctl_init(void)
1905{
1906        struct ctl_table_header *hdr;
1907
1908        hdr = register_sysctl_table(sysctl_base_table);
1909        kmemleak_not_leak(hdr);
1910        return 0;
1911}
1912
1913#endif /* CONFIG_SYSCTL */
1914
1915/*
1916 * /proc/sys support
1917 */
1918
1919#ifdef CONFIG_PROC_SYSCTL
1920
1921static int _proc_do_string(char *data, int maxlen, int write,
1922                           char __user *buffer,
1923                           size_t *lenp, loff_t *ppos)
1924{
1925        size_t len;
1926        char __user *p;
1927        char c;
1928
1929        if (!data || !maxlen || !*lenp) {
1930                *lenp = 0;
1931                return 0;
1932        }
1933
1934        if (write) {
1935                if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1936                        /* Only continue writes not past the end of buffer. */
1937                        len = strlen(data);
1938                        if (len > maxlen - 1)
1939                                len = maxlen - 1;
1940
1941                        if (*ppos > len)
1942                                return 0;
1943                        len = *ppos;
1944                } else {
1945                        /* Start writing from beginning of buffer. */
1946                        len = 0;
1947                }
1948
1949                *ppos += *lenp;
1950                p = buffer;
1951                while ((p - buffer) < *lenp && len < maxlen - 1) {
1952                        if (get_user(c, p++))
1953                                return -EFAULT;
1954                        if (c == 0 || c == '\n')
1955                                break;
1956                        data[len++] = c;
1957                }
1958                data[len] = 0;
1959        } else {
1960                len = strlen(data);
1961                if (len > maxlen)
1962                        len = maxlen;
1963
1964                if (*ppos > len) {
1965                        *lenp = 0;
1966                        return 0;
1967                }
1968
1969                data += *ppos;
1970                len  -= *ppos;
1971
1972                if (len > *lenp)
1973                        len = *lenp;
1974                if (len)
1975                        if (copy_to_user(buffer, data, len))
1976                                return -EFAULT;
1977                if (len < *lenp) {
1978                        if (put_user('\n', buffer + len))
1979                                return -EFAULT;
1980                        len++;
1981                }
1982                *lenp = len;
1983                *ppos += len;
1984        }
1985        return 0;
1986}
1987
1988static void warn_sysctl_write(struct ctl_table *table)
1989{
1990        pr_warn_once("%s wrote to %s when file position was not 0!\n"
1991                "This will not be supported in the future. To silence this\n"
1992                "warning, set kernel.sysctl_writes_strict = -1\n",
1993                current->comm, table->procname);
1994}
1995
1996/**
1997 * proc_first_pos_non_zero_ignore - check if first position is allowed
1998 * @ppos: file position
1999 * @table: the sysctl table
2000 *
2001 * Returns true if the first position is non-zero and the sysctl_writes_strict
2002 * mode indicates this is not allowed for numeric input types. String proc
2003 * handlers can ignore the return value.
2004 */
2005static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2006                                           struct ctl_table *table)
2007{
2008        if (!*ppos)
2009                return false;
2010
2011        switch (sysctl_writes_strict) {
2012        case SYSCTL_WRITES_STRICT:
2013                return true;
2014        case SYSCTL_WRITES_WARN:
2015                warn_sysctl_write(table);
2016                return false;
2017        default:
2018                return false;
2019        }
2020}
2021
2022/**
2023 * proc_dostring - read a string sysctl
2024 * @table: the sysctl table
2025 * @write: %TRUE if this is a write to the sysctl file
2026 * @buffer: the user buffer
2027 * @lenp: the size of the user buffer
2028 * @ppos: file position
2029 *
2030 * Reads/writes a string from/to the user buffer. If the kernel
2031 * buffer provided is not large enough to hold the string, the
2032 * string is truncated. The copied string is %NULL-terminated.
2033 * If the string is being read by the user process, it is copied
2034 * and a newline '\n' is added. It is truncated if the buffer is
2035 * not large enough.
2036 *
2037 * Returns 0 on success.
2038 */
2039int proc_dostring(struct ctl_table *table, int write,
2040                  void __user *buffer, size_t *lenp, loff_t *ppos)
2041{
2042        if (write)
2043                proc_first_pos_non_zero_ignore(ppos, table);
2044
2045        return _proc_do_string((char *)(table->data), table->maxlen, write,
2046                               (char __user *)buffer, lenp, ppos);
2047}
2048
2049static size_t proc_skip_spaces(char **buf)
2050{
2051        size_t ret;
2052        char *tmp = skip_spaces(*buf);
2053        ret = tmp - *buf;
2054        *buf = tmp;
2055        return ret;
2056}
2057
2058static void proc_skip_char(char **buf, size_t *size, const char v)
2059{
2060        while (*size) {
2061                if (**buf != v)
2062                        break;
2063                (*size)--;
2064                (*buf)++;
2065        }
2066}
2067
2068#define TMPBUFLEN 22
2069/**
2070 * proc_get_long - reads an ASCII formatted integer from a user buffer
2071 *
2072 * @buf: a kernel buffer
2073 * @size: size of the kernel buffer
2074 * @val: this is where the number will be stored
2075 * @neg: set to %TRUE if number is negative
2076 * @perm_tr: a vector which contains the allowed trailers
2077 * @perm_tr_len: size of the perm_tr vector
2078 * @tr: pointer to store the trailer character
2079 *
2080 * In case of success %0 is returned and @buf and @size are updated with
2081 * the amount of bytes read. If @tr is non-NULL and a trailing
2082 * character exists (size is non-zero after returning from this
2083 * function), @tr is updated with the trailing character.
2084 */
2085static int proc_get_long(char **buf, size_t *size,
2086                          unsigned long *val, bool *neg,
2087                          const char *perm_tr, unsigned perm_tr_len, char *tr)
2088{
2089        int len;
2090        char *p, tmp[TMPBUFLEN];
2091
2092        if (!*size)
2093                return -EINVAL;
2094
2095        len = *size;
2096        if (len > TMPBUFLEN - 1)
2097                len = TMPBUFLEN - 1;
2098
2099        memcpy(tmp, *buf, len);
2100
2101        tmp[len] = 0;
2102        p = tmp;
2103        if (*p == '-' && *size > 1) {
2104                *neg = true;
2105                p++;
2106        } else
2107                *neg = false;
2108        if (!isdigit(*p))
2109                return -EINVAL;
2110
2111        *val = simple_strtoul(p, &p, 0);
2112
2113        len = p - tmp;
2114
2115        /* We don't know if the next char is whitespace thus we may accept
2116         * invalid integers (e.g. 1234...a) or two integers instead of one
2117         * (e.g. 123...1). So lets not allow such large numbers. */
2118        if (len == TMPBUFLEN - 1)
2119                return -EINVAL;
2120
2121        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2122                return -EINVAL;
2123
2124        if (tr && (len < *size))
2125                *tr = *p;
2126
2127        *buf += len;
2128        *size -= len;
2129
2130        return 0;
2131}
2132
2133/**
2134 * proc_put_long - converts an integer to a decimal ASCII formatted string
2135 *
2136 * @buf: the user buffer
2137 * @size: the size of the user buffer
2138 * @val: the integer to be converted
2139 * @neg: sign of the number, %TRUE for negative
2140 *
2141 * In case of success %0 is returned and @buf and @size are updated with
2142 * the amount of bytes written.
2143 */
2144static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2145                          bool neg)
2146{
2147        int len;
2148        char tmp[TMPBUFLEN], *p = tmp;
2149
2150        sprintf(p, "%s%lu", neg ? "-" : "", val);
2151        len = strlen(tmp);
2152        if (len > *size)
2153                len = *size;
2154        if (copy_to_user(*buf, tmp, len))
2155                return -EFAULT;
2156        *size -= len;
2157        *buf += len;
2158        return 0;
2159}
2160#undef TMPBUFLEN
2161
2162static int proc_put_char(void __user **buf, size_t *size, char c)
2163{
2164        if (*size) {
2165                char __user **buffer = (char __user **)buf;
2166                if (put_user(c, *buffer))
2167                        return -EFAULT;
2168                (*size)--, (*buffer)++;
2169                *buf = *buffer;
2170        }
2171        return 0;
2172}
2173
2174static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2175                                 int *valp,
2176                                 int write, void *data)
2177{
2178        if (write) {
2179                if (*negp) {
2180                        if (*lvalp > (unsigned long) INT_MAX + 1)
2181                                return -EINVAL;
2182                        *valp = -*lvalp;
2183                } else {
2184                        if (*lvalp > (unsigned long) INT_MAX)
2185                                return -EINVAL;
2186                        *valp = *lvalp;
2187                }
2188        } else {
2189                int val = *valp;
2190                if (val < 0) {
2191                        *negp = true;
2192                        *lvalp = -(unsigned long)val;
2193                } else {
2194                        *negp = false;
2195                        *lvalp = (unsigned long)val;
2196                }
2197        }
2198        return 0;
2199}
2200
2201static int do_proc_douintvec_conv(unsigned long *lvalp,
2202                                  unsigned int *valp,
2203                                  int write, void *data)
2204{
2205        if (write) {
2206                if (*lvalp > UINT_MAX)
2207                        return -EINVAL;
2208                *valp = *lvalp;
2209        } else {
2210                unsigned int val = *valp;
2211                *lvalp = (unsigned long)val;
2212        }
2213        return 0;
2214}
2215
2216static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2217
2218static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2219                  int write, void __user *buffer,
2220                  size_t *lenp, loff_t *ppos,
2221                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2222                              int write, void *data),
2223                  void *data)
2224{
2225        int *i, vleft, first = 1, err = 0;
2226        size_t left;
2227        char *kbuf = NULL, *p;
2228        
2229        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2230                *lenp = 0;
2231                return 0;
2232        }
2233        
2234        i = (int *) tbl_data;
2235        vleft = table->maxlen / sizeof(*i);
2236        left = *lenp;
2237
2238        if (!conv)
2239                conv = do_proc_dointvec_conv;
2240
2241        if (write) {
2242                if (proc_first_pos_non_zero_ignore(ppos, table))
2243                        goto out;
2244
2245                if (left > PAGE_SIZE - 1)
2246                        left = PAGE_SIZE - 1;
2247                p = kbuf = memdup_user_nul(buffer, left);
2248                if (IS_ERR(kbuf))
2249                        return PTR_ERR(kbuf);
2250        }
2251
2252        for (; left && vleft--; i++, first=0) {
2253                unsigned long lval;
2254                bool neg;
2255
2256                if (write) {
2257                        left -= proc_skip_spaces(&p);
2258
2259                        if (!left)
2260                                break;
2261                        err = proc_get_long(&p, &left, &lval, &neg,
2262                                             proc_wspace_sep,
2263                                             sizeof(proc_wspace_sep), NULL);
2264                        if (err)
2265                                break;
2266                        if (conv(&neg, &lval, i, 1, data)) {
2267                                err = -EINVAL;
2268                                break;
2269                        }
2270                } else {
2271                        if (conv(&neg, &lval, i, 0, data)) {
2272                                err = -EINVAL;
2273                                break;
2274                        }
2275                        if (!first)
2276                                err = proc_put_char(&buffer, &left, '\t');
2277                        if (err)
2278                                break;
2279                        err = proc_put_long(&buffer, &left, lval, neg);
2280                        if (err)
2281                                break;
2282                }
2283        }
2284
2285        if (!write && !first && left && !err)
2286                err = proc_put_char(&buffer, &left, '\n');
2287        if (write && !err && left)
2288                left -= proc_skip_spaces(&p);
2289        if (write) {
2290                kfree(kbuf);
2291                if (first)
2292                        return err ? : -EINVAL;
2293        }
2294        *lenp -= left;
2295out:
2296        *ppos += *lenp;
2297        return err;
2298}
2299
2300static int do_proc_dointvec(struct ctl_table *table, int write,
2301                  void __user *buffer, size_t *lenp, loff_t *ppos,
2302                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2303                              int write, void *data),
2304                  void *data)
2305{
2306        return __do_proc_dointvec(table->data, table, write,
2307                        buffer, lenp, ppos, conv, data);
2308}
2309
2310static int do_proc_douintvec_w(unsigned int *tbl_data,
2311                               struct ctl_table *table,
2312                               void __user *buffer,
2313                               size_t *lenp, loff_t *ppos,
2314                               int (*conv)(unsigned long *lvalp,
2315                                           unsigned int *valp,
2316                                           int write, void *data),
2317                               void *data)
2318{
2319        unsigned long lval;
2320        int err = 0;
2321        size_t left;
2322        bool neg;
2323        char *kbuf = NULL, *p;
2324
2325        left = *lenp;
2326
2327        if (proc_first_pos_non_zero_ignore(ppos, table))
2328                goto bail_early;
2329
2330        if (left > PAGE_SIZE - 1)
2331                left = PAGE_SIZE - 1;
2332
2333        p = kbuf = memdup_user_nul(buffer, left);
2334        if (IS_ERR(kbuf))
2335                return -EINVAL;
2336
2337        left -= proc_skip_spaces(&p);
2338        if (!left) {
2339                err = -EINVAL;
2340                goto out_free;
2341        }
2342
2343        err = proc_get_long(&p, &left, &lval, &neg,
2344                             proc_wspace_sep,
2345                             sizeof(proc_wspace_sep), NULL);
2346        if (err || neg) {
2347                err = -EINVAL;
2348                goto out_free;
2349        }
2350
2351        if (conv(&lval, tbl_data, 1, data)) {
2352                err = -EINVAL;
2353                goto out_free;
2354        }
2355
2356        if (!err && left)
2357                left -= proc_skip_spaces(&p);
2358
2359out_free:
2360        kfree(kbuf);
2361        if (err)
2362                return -EINVAL;
2363
2364        return 0;
2365
2366        /* This is in keeping with old __do_proc_dointvec() */
2367bail_early:
2368        *ppos += *lenp;
2369        return err;
2370}
2371
2372static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2373                               size_t *lenp, loff_t *ppos,
2374                               int (*conv)(unsigned long *lvalp,
2375                                           unsigned int *valp,
2376                                           int write, void *data),
2377                               void *data)
2378{
2379        unsigned long lval;
2380        int err = 0;
2381        size_t left;
2382
2383        left = *lenp;
2384
2385        if (conv(&lval, tbl_data, 0, data)) {
2386                err = -EINVAL;
2387                goto out;
2388        }
2389
2390        err = proc_put_long(&buffer, &left, lval, false);
2391        if (err || !left)
2392                goto out;
2393
2394        err = proc_put_char(&buffer, &left, '\n');
2395
2396out:
2397        *lenp -= left;
2398        *ppos += *lenp;
2399
2400        return err;
2401}
2402
2403static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2404                               int write, void __user *buffer,
2405                               size_t *lenp, loff_t *ppos,
2406                               int (*conv)(unsigned long *lvalp,
2407                                           unsigned int *valp,
2408                                           int write, void *data),
2409                               void *data)
2410{
2411        unsigned int *i, vleft;
2412
2413        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2414                *lenp = 0;
2415                return 0;
2416        }
2417
2418        i = (unsigned int *) tbl_data;
2419        vleft = table->maxlen / sizeof(*i);
2420
2421        /*
2422         * Arrays are not supported, keep this simple. *Do not* add
2423         * support for them.
2424         */
2425        if (vleft != 1) {
2426                *lenp = 0;
2427                return -EINVAL;
2428        }
2429
2430        if (!conv)
2431                conv = do_proc_douintvec_conv;
2432
2433        if (write)
2434                return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2435                                           conv, data);
2436        return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2437}
2438
2439static int do_proc_douintvec(struct ctl_table *table, int write,
2440                             void __user *buffer, size_t *lenp, loff_t *ppos,
2441                             int (*conv)(unsigned long *lvalp,
2442                                         unsigned int *valp,
2443                                         int write, void *data),
2444                             void *data)
2445{
2446        return __do_proc_douintvec(table->data, table, write,
2447                                   buffer, lenp, ppos, conv, data);
2448}
2449
2450/**
2451 * proc_dointvec - read a vector of integers
2452 * @table: the sysctl table
2453 * @write: %TRUE if this is a write to the sysctl file
2454 * @buffer: the user buffer
2455 * @lenp: the size of the user buffer
2456 * @ppos: file position
2457 *
2458 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2459 * values from/to the user buffer, treated as an ASCII string. 
2460 *
2461 * Returns 0 on success.
2462 */
2463int proc_dointvec(struct ctl_table *table, int write,
2464                     void __user *buffer, size_t *lenp, loff_t *ppos)
2465{
2466        return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2467}
2468
2469/**
2470 * proc_douintvec - read a vector of unsigned integers
2471 * @table: the sysctl table
2472 * @write: %TRUE if this is a write to the sysctl file
2473 * @buffer: the user buffer
2474 * @lenp: the size of the user buffer
2475 * @ppos: file position
2476 *
2477 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2478 * values from/to the user buffer, treated as an ASCII string.
2479 *
2480 * Returns 0 on success.
2481 */
2482int proc_douintvec(struct ctl_table *table, int write,
2483                     void __user *buffer, size_t *lenp, loff_t *ppos)
2484{
2485        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2486                                 do_proc_douintvec_conv, NULL);
2487}
2488
2489/*
2490 * Taint values can only be increased
2491 * This means we can safely use a temporary.
2492 */
2493static int proc_taint(struct ctl_table *table, int write,
2494                               void __user *buffer, size_t *lenp, loff_t *ppos)
2495{
2496        struct ctl_table t;
2497        unsigned long tmptaint = get_taint();
2498        int err;
2499
2500        if (write && !capable(CAP_SYS_ADMIN))
2501                return -EPERM;
2502
2503        t = *table;
2504        t.data = &tmptaint;
2505        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2506        if (err < 0)
2507                return err;
2508
2509        if (write) {
2510                /*
2511                 * Poor man's atomic or. Not worth adding a primitive
2512                 * to everyone's atomic.h for this
2513                 */
2514                int i;
2515                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2516                        if ((tmptaint >> i) & 1)
2517                                add_taint(i, LOCKDEP_STILL_OK);
2518                }
2519        }
2520
2521        return err;
2522}
2523
2524#ifdef CONFIG_PRINTK
2525static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2526                                void __user *buffer, size_t *lenp, loff_t *ppos)
2527{
2528        if (write && !capable(CAP_SYS_ADMIN))
2529                return -EPERM;
2530
2531        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2532}
2533#endif
2534
2535/**
2536 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2537 * @min: pointer to minimum allowable value
2538 * @max: pointer to maximum allowable value
2539 *
2540 * The do_proc_dointvec_minmax_conv_param structure provides the
2541 * minimum and maximum values for doing range checking for those sysctl
2542 * parameters that use the proc_dointvec_minmax() handler.
2543 */
2544struct do_proc_dointvec_minmax_conv_param {
2545        int *min;
2546        int *max;
2547};
2548
2549static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2550                                        int *valp,
2551                                        int write, void *data)
2552{
2553        struct do_proc_dointvec_minmax_conv_param *param = data;
2554        if (write) {
2555                int val = *negp ? -*lvalp : *lvalp;
2556                if ((param->min && *param->min > val) ||
2557                    (param->max && *param->max < val))
2558                        return -EINVAL;
2559                *valp = val;
2560        } else {
2561                int val = *valp;
2562                if (val < 0) {
2563                        *negp = true;
2564                        *lvalp = -(unsigned long)val;
2565                } else {
2566                        *negp = false;
2567                        *lvalp = (unsigned long)val;
2568                }
2569        }
2570        return 0;
2571}
2572
2573/**
2574 * proc_dointvec_minmax - read a vector of integers with min/max values
2575 * @table: the sysctl table
2576 * @write: %TRUE if this is a write to the sysctl file
2577 * @buffer: the user buffer
2578 * @lenp: the size of the user buffer
2579 * @ppos: file position
2580 *
2581 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2582 * values from/to the user buffer, treated as an ASCII string.
2583 *
2584 * This routine will ensure the values are within the range specified by
2585 * table->extra1 (min) and table->extra2 (max).
2586 *
2587 * Returns 0 on success or -EINVAL on write when the range check fails.
2588 */
2589int proc_dointvec_minmax(struct ctl_table *table, int write,
2590                  void __user *buffer, size_t *lenp, loff_t *ppos)
2591{
2592        struct do_proc_dointvec_minmax_conv_param param = {
2593                .min = (int *) table->extra1,
2594                .max = (int *) table->extra2,
2595        };
2596        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2597                                do_proc_dointvec_minmax_conv, &param);
2598}
2599
2600/**
2601 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2602 * @min: pointer to minimum allowable value
2603 * @max: pointer to maximum allowable value
2604 *
2605 * The do_proc_douintvec_minmax_conv_param structure provides the
2606 * minimum and maximum values for doing range checking for those sysctl
2607 * parameters that use the proc_douintvec_minmax() handler.
2608 */
2609struct do_proc_douintvec_minmax_conv_param {
2610        unsigned int *min;
2611        unsigned int *max;
2612};
2613
2614static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2615                                         unsigned int *valp,
2616                                         int write, void *data)
2617{
2618        struct do_proc_douintvec_minmax_conv_param *param = data;
2619
2620        if (write) {
2621                unsigned int val = *lvalp;
2622
2623                if (*lvalp > UINT_MAX)
2624                        return -EINVAL;
2625
2626                if ((param->min && *param->min > val) ||
2627                    (param->max && *param->max < val))
2628                        return -ERANGE;
2629
2630                *valp = val;
2631        } else {
2632                unsigned int val = *valp;
2633                *lvalp = (unsigned long) val;
2634        }
2635
2636        return 0;
2637}
2638
2639/**
2640 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2641 * @table: the sysctl table
2642 * @write: %TRUE if this is a write to the sysctl file
2643 * @buffer: the user buffer
2644 * @lenp: the size of the user buffer
2645 * @ppos: file position
2646 *
2647 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2648 * values from/to the user buffer, treated as an ASCII string. Negative
2649 * strings are not allowed.
2650 *
2651 * This routine will ensure the values are within the range specified by
2652 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2653 * check for UINT_MAX to avoid having to support wrap around uses from
2654 * userspace.
2655 *
2656 * Returns 0 on success or -ERANGE on write when the range check fails.
2657 */
2658int proc_douintvec_minmax(struct ctl_table *table, int write,
2659                          void __user *buffer, size_t *lenp, loff_t *ppos)
2660{
2661        struct do_proc_douintvec_minmax_conv_param param = {
2662                .min = (unsigned int *) table->extra1,
2663                .max = (unsigned int *) table->extra2,
2664        };
2665        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2666                                 do_proc_douintvec_minmax_conv, &param);
2667}
2668
2669static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2670                                        unsigned int *valp,
2671                                        int write, void *data)
2672{
2673        if (write) {
2674                unsigned int val;
2675
2676                val = round_pipe_size(*lvalp);
2677                if (val == 0)
2678                        return -EINVAL;
2679
2680                *valp = val;
2681        } else {
2682                unsigned int val = *valp;
2683                *lvalp = (unsigned long) val;
2684        }
2685
2686        return 0;
2687}
2688
2689static int proc_dopipe_max_size(struct ctl_table *table, int write,
2690                                void __user *buffer, size_t *lenp, loff_t *ppos)
2691{
2692        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2693                                 do_proc_dopipe_max_size_conv, NULL);
2694}
2695
2696static void validate_coredump_safety(void)
2697{
2698#ifdef CONFIG_COREDUMP
2699        if (suid_dumpable == SUID_DUMP_ROOT &&
2700            core_pattern[0] != '/' && core_pattern[0] != '|') {
2701                printk(KERN_WARNING
2702"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2703"Pipe handler or fully qualified core dump path required.\n"
2704"Set kernel.core_pattern before fs.suid_dumpable.\n"
2705                );
2706        }
2707#endif
2708}
2709
2710static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2711                void __user *buffer, size_t *lenp, loff_t *ppos)
2712{
2713        int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2714        if (!error)
2715                validate_coredump_safety();
2716        return error;
2717}
2718
2719#ifdef CONFIG_COREDUMP
2720static int proc_dostring_coredump(struct ctl_table *table, int write,
2721                  void __user *buffer, size_t *lenp, loff_t *ppos)
2722{
2723        int error = proc_dostring(table, write, buffer, lenp, ppos);
2724        if (!error)
2725                validate_coredump_safety();
2726        return error;
2727}
2728#endif
2729
2730static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2731                                     void __user *buffer,
2732                                     size_t *lenp, loff_t *ppos,
2733                                     unsigned long convmul,
2734                                     unsigned long convdiv)
2735{
2736        unsigned long *i, *min, *max;
2737        int vleft, first = 1, err = 0;
2738        size_t left;
2739        char *kbuf = NULL, *p;
2740
2741        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2742                *lenp = 0;
2743                return 0;
2744        }
2745
2746        i = (unsigned long *) data;
2747        min = (unsigned long *) table->extra1;
2748        max = (unsigned long *) table->extra2;
2749        vleft = table->maxlen / sizeof(unsigned long);
2750        left = *lenp;
2751
2752        if (write) {
2753                if (proc_first_pos_non_zero_ignore(ppos, table))
2754                        goto out;
2755
2756                if (left > PAGE_SIZE - 1)
2757                        left = PAGE_SIZE - 1;
2758                p = kbuf = memdup_user_nul(buffer, left);
2759                if (IS_ERR(kbuf))
2760                        return PTR_ERR(kbuf);
2761        }
2762
2763        for (; left && vleft--; i++, first = 0) {
2764                unsigned long val;
2765
2766                if (write) {
2767                        bool neg;
2768
2769                        left -= proc_skip_spaces(&p);
2770
2771                        err = proc_get_long(&p, &left, &val, &neg,
2772                                             proc_wspace_sep,
2773                                             sizeof(proc_wspace_sep), NULL);
2774                        if (err)
2775                                break;
2776                        if (neg)
2777                                continue;
2778                        val = convmul * val / convdiv;
2779                        if ((min && val < *min) || (max && val > *max))
2780                                continue;
2781                        *i = val;
2782                } else {
2783                        val = convdiv * (*i) / convmul;
2784                        if (!first) {
2785                                err = proc_put_char(&buffer, &left, '\t');
2786                                if (err)
2787                                        break;
2788                        }
2789                        err = proc_put_long(&buffer, &left, val, false);
2790                        if (err)
2791                                break;
2792                }
2793        }
2794
2795        if (!write && !first && left && !err)
2796                err = proc_put_char(&buffer, &left, '\n');
2797        if (write && !err)
2798                left -= proc_skip_spaces(&p);
2799        if (write) {
2800                kfree(kbuf);
2801                if (first)
2802                        return err ? : -EINVAL;
2803        }
2804        *lenp -= left;
2805out:
2806        *ppos += *lenp;
2807        return err;
2808}
2809
2810static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2811                                     void __user *buffer,
2812                                     size_t *lenp, loff_t *ppos,
2813                                     unsigned long convmul,
2814                                     unsigned long convdiv)
2815{
2816        return __do_proc_doulongvec_minmax(table->data, table, write,
2817                        buffer, lenp, ppos, convmul, convdiv);
2818}
2819
2820/**
2821 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2822 * @table: the sysctl table
2823 * @write: %TRUE if this is a write to the sysctl file
2824 * @buffer: the user buffer
2825 * @lenp: the size of the user buffer
2826 * @ppos: file position
2827 *
2828 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2829 * values from/to the user buffer, treated as an ASCII string.
2830 *
2831 * This routine will ensure the values are within the range specified by
2832 * table->extra1 (min) and table->extra2 (max).
2833 *
2834 * Returns 0 on success.
2835 */
2836int proc_doulongvec_minmax(struct ctl_table *table, int write,
2837                           void __user *buffer, size_t *lenp, loff_t *ppos)
2838{
2839    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2840}
2841
2842/**
2843 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2844 * @table: the sysctl table
2845 * @write: %TRUE if this is a write to the sysctl file
2846 * @buffer: the user buffer
2847 * @lenp: the size of the user buffer
2848 * @ppos: file position
2849 *
2850 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2851 * values from/to the user buffer, treated as an ASCII string. The values
2852 * are treated as milliseconds, and converted to jiffies when they are stored.
2853 *
2854 * This routine will ensure the values are within the range specified by
2855 * table->extra1 (min) and table->extra2 (max).
2856 *
2857 * Returns 0 on success.
2858 */
2859int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2860                                      void __user *buffer,
2861                                      size_t *lenp, loff_t *ppos)
2862{
2863    return do_proc_doulongvec_minmax(table, write, buffer,
2864                                     lenp, ppos, HZ, 1000l);
2865}
2866
2867
2868static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2869                                         int *valp,
2870                                         int write, void *data)
2871{
2872        if (write) {
2873                if (*lvalp > INT_MAX / HZ)
2874                        return 1;
2875                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2876        } else {
2877                int val = *valp;
2878                unsigned long lval;
2879                if (val < 0) {
2880                        *negp = true;
2881                        lval = -(unsigned long)val;
2882                } else {
2883                        *negp = false;
2884                        lval = (unsigned long)val;
2885                }
2886                *lvalp = lval / HZ;
2887        }
2888        return 0;
2889}
2890
2891static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2892                                                int *valp,
2893                                                int write, void *data)
2894{
2895        if (write) {
2896                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2897                        return 1;
2898                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2899        } else {
2900                int val = *valp;
2901                unsigned long lval;
2902                if (val < 0) {
2903                        *negp = true;
2904                        lval = -(unsigned long)val;
2905                } else {
2906                        *negp = false;
2907                        lval = (unsigned long)val;
2908                }
2909                *lvalp = jiffies_to_clock_t(lval);
2910        }
2911        return 0;
2912}
2913
2914static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2915                                            int *valp,
2916                                            int write, void *data)
2917{
2918        if (write) {
2919                unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2920
2921                if (jif > INT_MAX)
2922                        return 1;
2923                *valp = (int)jif;
2924        } else {
2925                int val = *valp;
2926                unsigned long lval;
2927                if (val < 0) {
2928                        *negp = true;
2929                        lval = -(unsigned long)val;
2930                } else {
2931                        *negp = false;
2932                        lval = (unsigned long)val;
2933                }
2934                *lvalp = jiffies_to_msecs(lval);
2935        }
2936        return 0;
2937}
2938
2939/**
2940 * proc_dointvec_jiffies - read a vector of integers as seconds
2941 * @table: the sysctl table
2942 * @write: %TRUE if this is a write to the sysctl file
2943 * @buffer: the user buffer
2944 * @lenp: the size of the user buffer
2945 * @ppos: file position
2946 *
2947 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2948 * values from/to the user buffer, treated as an ASCII string. 
2949 * The values read are assumed to be in seconds, and are converted into
2950 * jiffies.
2951 *
2952 * Returns 0 on success.
2953 */
2954int proc_dointvec_jiffies(struct ctl_table *table, int write,
2955                          void __user *buffer, size_t *lenp, loff_t *ppos)
2956{
2957    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2958                            do_proc_dointvec_jiffies_conv,NULL);
2959}
2960
2961/**
2962 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2963 * @table: the sysctl table
2964 * @write: %TRUE if this is a write to the sysctl file
2965 * @buffer: the user buffer
2966 * @lenp: the size of the user buffer
2967 * @ppos: pointer to the file position
2968 *
2969 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2970 * values from/to the user buffer, treated as an ASCII string. 
2971 * The values read are assumed to be in 1/USER_HZ seconds, and 
2972 * are converted into jiffies.
2973 *
2974 * Returns 0 on success.
2975 */
2976int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2977                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2978{
2979    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2980                            do_proc_dointvec_userhz_jiffies_conv,NULL);
2981}
2982
2983/**
2984 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2985 * @table: the sysctl table
2986 * @write: %TRUE if this is a write to the sysctl file
2987 * @buffer: the user buffer
2988 * @lenp: the size of the user buffer
2989 * @ppos: file position
2990 * @ppos: the current position in the file
2991 *
2992 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2993 * values from/to the user buffer, treated as an ASCII string. 
2994 * The values read are assumed to be in 1/1000 seconds, and 
2995 * are converted into jiffies.
2996 *
2997 * Returns 0 on success.
2998 */
2999int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3000                             void __user *buffer, size_t *lenp, loff_t *ppos)
3001{
3002        return do_proc_dointvec(table, write, buffer, lenp, ppos,
3003                                do_proc_dointvec_ms_jiffies_conv, NULL);
3004}
3005
3006static int proc_do_cad_pid(struct ctl_table *table, int write,
3007                           void __user *buffer, size_t *lenp, loff_t *ppos)
3008{
3009        struct pid *new_pid;
3010        pid_t tmp;
3011        int r;
3012
3013        tmp = pid_vnr(cad_pid);
3014
3015        r = __do_proc_dointvec(&tmp, table, write, buffer,
3016                               lenp, ppos, NULL, NULL);
3017        if (r || !write)
3018                return r;
3019
3020        new_pid = find_get_pid(tmp);
3021        if (!new_pid)
3022                return -ESRCH;
3023
3024        put_pid(xchg(&cad_pid, new_pid));
3025        return 0;
3026}
3027
3028/**
3029 * proc_do_large_bitmap - read/write from/to a large bitmap
3030 * @table: the sysctl table
3031 * @write: %TRUE if this is a write to the sysctl file
3032 * @buffer: the user buffer
3033 * @lenp: the size of the user buffer
3034 * @ppos: file position
3035 *
3036 * The bitmap is stored at table->data and the bitmap length (in bits)
3037 * in table->maxlen.
3038 *
3039 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3040 * large bitmaps may be represented in a compact manner. Writing into
3041 * the file will clear the bitmap then update it with the given input.
3042 *
3043 * Returns 0 on success.
3044 */
3045int proc_do_large_bitmap(struct ctl_table *table, int write,
3046                         void __user *buffer, size_t *lenp, loff_t *ppos)
3047{
3048        int err = 0;
3049        bool first = 1;
3050        size_t left = *lenp;
3051        unsigned long bitmap_len = table->maxlen;
3052        unsigned long *bitmap = *(unsigned long **) table->data;
3053        unsigned long *tmp_bitmap = NULL;
3054        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3055
3056        if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3057                *lenp = 0;
3058                return 0;
3059        }
3060
3061        if (write) {
3062                char *kbuf, *p;
3063
3064                if (left > PAGE_SIZE - 1)
3065                        left = PAGE_SIZE - 1;
3066
3067                p = kbuf = memdup_user_nul(buffer, left);
3068                if (IS_ERR(kbuf))
3069                        return PTR_ERR(kbuf);
3070
3071                tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3072                                     sizeof(unsigned long),
3073                                     GFP_KERNEL);
3074                if (!tmp_bitmap) {
3075                        kfree(kbuf);
3076                        return -ENOMEM;
3077                }
3078                proc_skip_char(&p, &left, '\n');
3079                while (!err && left) {
3080                        unsigned long val_a, val_b;
3081                        bool neg;
3082
3083                        err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3084                                             sizeof(tr_a), &c);
3085                        if (err)
3086                                break;
3087                        if (val_a >= bitmap_len || neg) {
3088                                err = -EINVAL;
3089                                break;
3090                        }
3091
3092                        val_b = val_a;
3093                        if (left) {
3094                                p++;
3095                                left--;
3096                        }
3097
3098                        if (c == '-') {
3099                                err = proc_get_long(&p, &left, &val_b,
3100                                                     &neg, tr_b, sizeof(tr_b),
3101                                                     &c);
3102                                if (err)
3103                                        break;
3104                                if (val_b >= bitmap_len || neg ||
3105                                    val_a > val_b) {
3106                                        err = -EINVAL;
3107                                        break;
3108                                }
3109                                if (left) {
3110                                        p++;
3111                                        left--;
3112                                }
3113                        }
3114
3115                        bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3116                        first = 0;
3117                        proc_skip_char(&p, &left, '\n');
3118                }
3119                kfree(kbuf);
3120        } else {
3121                unsigned long bit_a, bit_b = 0;
3122
3123                while (left) {
3124                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3125                        if (bit_a >= bitmap_len)
3126                                break;
3127                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
3128                                                   bit_a + 1) - 1;
3129
3130                        if (!first) {
3131                                err = proc_put_char(&buffer, &left, ',');
3132                                if (err)
3133                                        break;
3134                        }
3135                        err = proc_put_long(&buffer, &left, bit_a, false);
3136                        if (err)
3137                                break;
3138                        if (bit_a != bit_b) {
3139                                err = proc_put_char(&buffer, &left, '-');
3140                                if (err)
3141                                        break;
3142                                err = proc_put_long(&buffer, &left, bit_b, false);
3143                                if (err)
3144                                        break;
3145                        }
3146
3147                        first = 0; bit_b++;
3148                }
3149                if (!err)
3150                        err = proc_put_char(&buffer, &left, '\n');
3151        }
3152
3153        if (!err) {
3154                if (write) {
3155                        if (*ppos)
3156                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3157                        else
3158                                bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3159                }
3160                *lenp -= left;
3161                *ppos += *lenp;
3162        }
3163
3164        kfree(tmp_bitmap);
3165        return err;
3166}
3167
3168#else /* CONFIG_PROC_SYSCTL */
3169
3170int proc_dostring(struct ctl_table *table, int write,
3171                  void __user *buffer, size_t *lenp, loff_t *ppos)
3172{
3173        return -ENOSYS;
3174}
3175
3176int proc_dointvec(struct ctl_table *table, int write,
3177                  void __user *buffer, size_t *lenp, loff_t *ppos)
3178{
3179        return -ENOSYS;
3180}
3181
3182int proc_douintvec(struct ctl_table *table, int write,
3183                  void __user *buffer, size_t *lenp, loff_t *ppos)
3184{
3185        return -ENOSYS;
3186}
3187
3188int proc_dointvec_minmax(struct ctl_table *table, int write,
3189                    void __user *buffer, size_t *lenp, loff_t *ppos)
3190{
3191        return -ENOSYS;
3192}
3193
3194int proc_douintvec_minmax(struct ctl_table *table, int write,
3195                          void __user *buffer, size_t *lenp, loff_t *ppos)
3196{
3197        return -ENOSYS;
3198}
3199
3200int proc_dointvec_jiffies(struct ctl_table *table, int write,
3201                    void __user *buffer, size_t *lenp, loff_t *ppos)
3202{
3203        return -ENOSYS;
3204}
3205
3206int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3207                    void __user *buffer, size_t *lenp, loff_t *ppos)
3208{
3209        return -ENOSYS;
3210}
3211
3212int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3213                             void __user *buffer, size_t *lenp, loff_t *ppos)
3214{
3215        return -ENOSYS;
3216}
3217
3218int proc_doulongvec_minmax(struct ctl_table *table, int write,
3219                    void __user *buffer, size_t *lenp, loff_t *ppos)
3220{
3221        return -ENOSYS;
3222}
3223
3224int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3225                                      void __user *buffer,
3226                                      size_t *lenp, loff_t *ppos)
3227{
3228    return -ENOSYS;
3229}
3230
3231
3232#endif /* CONFIG_PROC_SYSCTL */
3233
3234/*
3235 * No sense putting this after each symbol definition, twice,
3236 * exception granted :-)
3237 */
3238EXPORT_SYMBOL(proc_dointvec);
3239EXPORT_SYMBOL(proc_douintvec);
3240EXPORT_SYMBOL(proc_dointvec_jiffies);
3241EXPORT_SYMBOL(proc_dointvec_minmax);
3242EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3243EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3244EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3245EXPORT_SYMBOL(proc_dostring);
3246EXPORT_SYMBOL(proc_doulongvec_minmax);
3247EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3248