linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/aio.h>
  23#include <linux/mm.h>
  24#include <linux/swap.h>
  25#include <linux/slab.h>
  26#include <linux/sysctl.h>
  27#include <linux/bitmap.h>
  28#include <linux/signal.h>
  29#include <linux/printk.h>
  30#include <linux/proc_fs.h>
  31#include <linux/security.h>
  32#include <linux/ctype.h>
  33#include <linux/kmemleak.h>
  34#include <linux/fs.h>
  35#include <linux/init.h>
  36#include <linux/kernel.h>
  37#include <linux/kobject.h>
  38#include <linux/net.h>
  39#include <linux/sysrq.h>
  40#include <linux/highuid.h>
  41#include <linux/writeback.h>
  42#include <linux/ratelimit.h>
  43#include <linux/compaction.h>
  44#include <linux/hugetlb.h>
  45#include <linux/initrd.h>
  46#include <linux/key.h>
  47#include <linux/times.h>
  48#include <linux/limits.h>
  49#include <linux/dcache.h>
  50#include <linux/dnotify.h>
  51#include <linux/syscalls.h>
  52#include <linux/vmstat.h>
  53#include <linux/nfs_fs.h>
  54#include <linux/acpi.h>
  55#include <linux/reboot.h>
  56#include <linux/ftrace.h>
  57#include <linux/perf_event.h>
  58#include <linux/kprobes.h>
  59#include <linux/pipe_fs_i.h>
  60#include <linux/oom.h>
  61#include <linux/kmod.h>
  62#include <linux/capability.h>
  63#include <linux/binfmts.h>
  64#include <linux/sched/sysctl.h>
  65#include <linux/sched/coredump.h>
  66#include <linux/kexec.h>
  67#include <linux/bpf.h>
  68#include <linux/mount.h>
  69#include <linux/pipe_fs_i.h>
  70
  71#include <linux/uaccess.h>
  72#include <asm/processor.h>
  73
  74#ifdef CONFIG_X86
  75#include <asm/nmi.h>
  76#include <asm/stacktrace.h>
  77#include <asm/io.h>
  78#endif
  79#ifdef CONFIG_SPARC
  80#include <asm/setup.h>
  81#endif
  82#ifdef CONFIG_BSD_PROCESS_ACCT
  83#include <linux/acct.h>
  84#endif
  85#ifdef CONFIG_RT_MUTEXES
  86#include <linux/rtmutex.h>
  87#endif
  88#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
  89#include <linux/lockdep.h>
  90#endif
  91#ifdef CONFIG_CHR_DEV_SG
  92#include <scsi/sg.h>
  93#endif
  94
  95#ifdef CONFIG_LOCKUP_DETECTOR
  96#include <linux/nmi.h>
  97#endif
  98
  99#if defined(CONFIG_SYSCTL)
 100
 101/* External variables not in a header file. */
 102extern int suid_dumpable;
 103#ifdef CONFIG_COREDUMP
 104extern int core_uses_pid;
 105extern char core_pattern[];
 106extern unsigned int core_pipe_limit;
 107#endif
 108extern int pid_max;
 109extern int pid_max_min, pid_max_max;
 110extern int percpu_pagelist_fraction;
 111extern int latencytop_enabled;
 112extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 113#ifndef CONFIG_MMU
 114extern int sysctl_nr_trim_pages;
 115#endif
 116
 117/* Constants used for minimum and  maximum */
 118#ifdef CONFIG_LOCKUP_DETECTOR
 119static int sixty = 60;
 120#endif
 121
 122static int __maybe_unused neg_one = -1;
 123
 124static int zero;
 125static int __maybe_unused one = 1;
 126static int __maybe_unused two = 2;
 127static int __maybe_unused four = 4;
 128static unsigned long one_ul = 1;
 129static int one_hundred = 100;
 130static int one_thousand = 1000;
 131#ifdef CONFIG_PRINTK
 132static int ten_thousand = 10000;
 133#endif
 134#ifdef CONFIG_PERF_EVENTS
 135static int six_hundred_forty_kb = 640 * 1024;
 136#endif
 137
 138/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 139static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 140
 141/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 142static int maxolduid = 65535;
 143static int minolduid;
 144
 145static int ngroups_max = NGROUPS_MAX;
 146static const int cap_last_cap = CAP_LAST_CAP;
 147
 148/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
 149#ifdef CONFIG_DETECT_HUNG_TASK
 150static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 151#endif
 152
 153#ifdef CONFIG_INOTIFY_USER
 154#include <linux/inotify.h>
 155#endif
 156#ifdef CONFIG_SPARC
 157#endif
 158
 159#ifdef __hppa__
 160extern int pwrsw_enabled;
 161#endif
 162
 163#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 164extern int unaligned_enabled;
 165#endif
 166
 167#ifdef CONFIG_IA64
 168extern int unaligned_dump_stack;
 169#endif
 170
 171#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
 172extern int no_unaligned_warning;
 173#endif
 174
 175#ifdef CONFIG_PROC_SYSCTL
 176
 177/**
 178 * enum sysctl_writes_mode - supported sysctl write modes
 179 *
 180 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
 181 *      to be written, and multiple writes on the same sysctl file descriptor
 182 *      will rewrite the sysctl value, regardless of file position. No warning
 183 *      is issued when the initial position is not 0.
 184 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
 185 *      not 0.
 186 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
 187 *      file position 0 and the value must be fully contained in the buffer
 188 *      sent to the write syscall. If dealing with strings respect the file
 189 *      position, but restrict this to the max length of the buffer, anything
 190 *      passed the max lenght will be ignored. Multiple writes will append
 191 *      to the buffer.
 192 *
 193 * These write modes control how current file position affects the behavior of
 194 * updating sysctl values through the proc interface on each write.
 195 */
 196enum sysctl_writes_mode {
 197        SYSCTL_WRITES_LEGACY            = -1,
 198        SYSCTL_WRITES_WARN              = 0,
 199        SYSCTL_WRITES_STRICT            = 1,
 200};
 201
 202static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 203
 204static int proc_do_cad_pid(struct ctl_table *table, int write,
 205                  void __user *buffer, size_t *lenp, loff_t *ppos);
 206static int proc_taint(struct ctl_table *table, int write,
 207                               void __user *buffer, size_t *lenp, loff_t *ppos);
 208#endif
 209
 210#ifdef CONFIG_PRINTK
 211static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 212                                void __user *buffer, size_t *lenp, loff_t *ppos);
 213#endif
 214
 215static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 216                void __user *buffer, size_t *lenp, loff_t *ppos);
 217#ifdef CONFIG_COREDUMP
 218static int proc_dostring_coredump(struct ctl_table *table, int write,
 219                void __user *buffer, size_t *lenp, loff_t *ppos);
 220#endif
 221static int proc_dopipe_max_size(struct ctl_table *table, int write,
 222                void __user *buffer, size_t *lenp, loff_t *ppos);
 223
 224#ifdef CONFIG_MAGIC_SYSRQ
 225/* Note: sysrq code uses it's own private copy */
 226static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
 227
 228static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 229                                void __user *buffer, size_t *lenp,
 230                                loff_t *ppos)
 231{
 232        int error;
 233
 234        error = proc_dointvec(table, write, buffer, lenp, ppos);
 235        if (error)
 236                return error;
 237
 238        if (write)
 239                sysrq_toggle_support(__sysrq_enabled);
 240
 241        return 0;
 242}
 243
 244#endif
 245
 246static struct ctl_table kern_table[];
 247static struct ctl_table vm_table[];
 248static struct ctl_table fs_table[];
 249static struct ctl_table debug_table[];
 250static struct ctl_table dev_table[];
 251extern struct ctl_table random_table[];
 252#ifdef CONFIG_EPOLL
 253extern struct ctl_table epoll_table[];
 254#endif
 255
 256#ifdef CONFIG_FW_LOADER_USER_HELPER
 257extern struct ctl_table firmware_config_table[];
 258#endif
 259
 260#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 261int sysctl_legacy_va_layout;
 262#endif
 263
 264/* The default sysctl tables: */
 265
 266static struct ctl_table sysctl_base_table[] = {
 267        {
 268                .procname       = "kernel",
 269                .mode           = 0555,
 270                .child          = kern_table,
 271        },
 272        {
 273                .procname       = "vm",
 274                .mode           = 0555,
 275                .child          = vm_table,
 276        },
 277        {
 278                .procname       = "fs",
 279                .mode           = 0555,
 280                .child          = fs_table,
 281        },
 282        {
 283                .procname       = "debug",
 284                .mode           = 0555,
 285                .child          = debug_table,
 286        },
 287        {
 288                .procname       = "dev",
 289                .mode           = 0555,
 290                .child          = dev_table,
 291        },
 292        { }
 293};
 294
 295#ifdef CONFIG_SCHED_DEBUG
 296static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 297static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 298static int min_wakeup_granularity_ns;                   /* 0 usecs */
 299static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 300#ifdef CONFIG_SMP
 301static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 302static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
 303#endif /* CONFIG_SMP */
 304#endif /* CONFIG_SCHED_DEBUG */
 305
 306#ifdef CONFIG_COMPACTION
 307static int min_extfrag_threshold;
 308static int max_extfrag_threshold = 1000;
 309#endif
 310
 311static struct ctl_table kern_table[] = {
 312        {
 313                .procname       = "sched_child_runs_first",
 314                .data           = &sysctl_sched_child_runs_first,
 315                .maxlen         = sizeof(unsigned int),
 316                .mode           = 0644,
 317                .proc_handler   = proc_dointvec,
 318        },
 319#ifdef CONFIG_SCHED_DEBUG
 320        {
 321                .procname       = "sched_min_granularity_ns",
 322                .data           = &sysctl_sched_min_granularity,
 323                .maxlen         = sizeof(unsigned int),
 324                .mode           = 0644,
 325                .proc_handler   = sched_proc_update_handler,
 326                .extra1         = &min_sched_granularity_ns,
 327                .extra2         = &max_sched_granularity_ns,
 328        },
 329        {
 330                .procname       = "sched_latency_ns",
 331                .data           = &sysctl_sched_latency,
 332                .maxlen         = sizeof(unsigned int),
 333                .mode           = 0644,
 334                .proc_handler   = sched_proc_update_handler,
 335                .extra1         = &min_sched_granularity_ns,
 336                .extra2         = &max_sched_granularity_ns,
 337        },
 338        {
 339                .procname       = "sched_wakeup_granularity_ns",
 340                .data           = &sysctl_sched_wakeup_granularity,
 341                .maxlen         = sizeof(unsigned int),
 342                .mode           = 0644,
 343                .proc_handler   = sched_proc_update_handler,
 344                .extra1         = &min_wakeup_granularity_ns,
 345                .extra2         = &max_wakeup_granularity_ns,
 346        },
 347#ifdef CONFIG_SMP
 348        {
 349                .procname       = "sched_tunable_scaling",
 350                .data           = &sysctl_sched_tunable_scaling,
 351                .maxlen         = sizeof(enum sched_tunable_scaling),
 352                .mode           = 0644,
 353                .proc_handler   = sched_proc_update_handler,
 354                .extra1         = &min_sched_tunable_scaling,
 355                .extra2         = &max_sched_tunable_scaling,
 356        },
 357        {
 358                .procname       = "sched_migration_cost_ns",
 359                .data           = &sysctl_sched_migration_cost,
 360                .maxlen         = sizeof(unsigned int),
 361                .mode           = 0644,
 362                .proc_handler   = proc_dointvec,
 363        },
 364        {
 365                .procname       = "sched_nr_migrate",
 366                .data           = &sysctl_sched_nr_migrate,
 367                .maxlen         = sizeof(unsigned int),
 368                .mode           = 0644,
 369                .proc_handler   = proc_dointvec,
 370        },
 371        {
 372                .procname       = "sched_time_avg_ms",
 373                .data           = &sysctl_sched_time_avg,
 374                .maxlen         = sizeof(unsigned int),
 375                .mode           = 0644,
 376                .proc_handler   = proc_dointvec_minmax,
 377                .extra1         = &one,
 378        },
 379#ifdef CONFIG_SCHEDSTATS
 380        {
 381                .procname       = "sched_schedstats",
 382                .data           = NULL,
 383                .maxlen         = sizeof(unsigned int),
 384                .mode           = 0644,
 385                .proc_handler   = sysctl_schedstats,
 386                .extra1         = &zero,
 387                .extra2         = &one,
 388        },
 389#endif /* CONFIG_SCHEDSTATS */
 390#endif /* CONFIG_SMP */
 391#ifdef CONFIG_NUMA_BALANCING
 392        {
 393                .procname       = "numa_balancing_scan_delay_ms",
 394                .data           = &sysctl_numa_balancing_scan_delay,
 395                .maxlen         = sizeof(unsigned int),
 396                .mode           = 0644,
 397                .proc_handler   = proc_dointvec,
 398        },
 399        {
 400                .procname       = "numa_balancing_scan_period_min_ms",
 401                .data           = &sysctl_numa_balancing_scan_period_min,
 402                .maxlen         = sizeof(unsigned int),
 403                .mode           = 0644,
 404                .proc_handler   = proc_dointvec,
 405        },
 406        {
 407                .procname       = "numa_balancing_scan_period_max_ms",
 408                .data           = &sysctl_numa_balancing_scan_period_max,
 409                .maxlen         = sizeof(unsigned int),
 410                .mode           = 0644,
 411                .proc_handler   = proc_dointvec,
 412        },
 413        {
 414                .procname       = "numa_balancing_scan_size_mb",
 415                .data           = &sysctl_numa_balancing_scan_size,
 416                .maxlen         = sizeof(unsigned int),
 417                .mode           = 0644,
 418                .proc_handler   = proc_dointvec_minmax,
 419                .extra1         = &one,
 420        },
 421        {
 422                .procname       = "numa_balancing",
 423                .data           = NULL, /* filled in by handler */
 424                .maxlen         = sizeof(unsigned int),
 425                .mode           = 0644,
 426                .proc_handler   = sysctl_numa_balancing,
 427                .extra1         = &zero,
 428                .extra2         = &one,
 429        },
 430#endif /* CONFIG_NUMA_BALANCING */
 431#endif /* CONFIG_SCHED_DEBUG */
 432        {
 433                .procname       = "sched_rt_period_us",
 434                .data           = &sysctl_sched_rt_period,
 435                .maxlen         = sizeof(unsigned int),
 436                .mode           = 0644,
 437                .proc_handler   = sched_rt_handler,
 438        },
 439        {
 440                .procname       = "sched_rt_runtime_us",
 441                .data           = &sysctl_sched_rt_runtime,
 442                .maxlen         = sizeof(int),
 443                .mode           = 0644,
 444                .proc_handler   = sched_rt_handler,
 445        },
 446        {
 447                .procname       = "sched_rr_timeslice_ms",
 448                .data           = &sysctl_sched_rr_timeslice,
 449                .maxlen         = sizeof(int),
 450                .mode           = 0644,
 451                .proc_handler   = sched_rr_handler,
 452        },
 453#ifdef CONFIG_SCHED_AUTOGROUP
 454        {
 455                .procname       = "sched_autogroup_enabled",
 456                .data           = &sysctl_sched_autogroup_enabled,
 457                .maxlen         = sizeof(unsigned int),
 458                .mode           = 0644,
 459                .proc_handler   = proc_dointvec_minmax,
 460                .extra1         = &zero,
 461                .extra2         = &one,
 462        },
 463#endif
 464#ifdef CONFIG_CFS_BANDWIDTH
 465        {
 466                .procname       = "sched_cfs_bandwidth_slice_us",
 467                .data           = &sysctl_sched_cfs_bandwidth_slice,
 468                .maxlen         = sizeof(unsigned int),
 469                .mode           = 0644,
 470                .proc_handler   = proc_dointvec_minmax,
 471                .extra1         = &one,
 472        },
 473#endif
 474#ifdef CONFIG_PROVE_LOCKING
 475        {
 476                .procname       = "prove_locking",
 477                .data           = &prove_locking,
 478                .maxlen         = sizeof(int),
 479                .mode           = 0644,
 480                .proc_handler   = proc_dointvec,
 481        },
 482#endif
 483#ifdef CONFIG_LOCK_STAT
 484        {
 485                .procname       = "lock_stat",
 486                .data           = &lock_stat,
 487                .maxlen         = sizeof(int),
 488                .mode           = 0644,
 489                .proc_handler   = proc_dointvec,
 490        },
 491#endif
 492        {
 493                .procname       = "panic",
 494                .data           = &panic_timeout,
 495                .maxlen         = sizeof(int),
 496                .mode           = 0644,
 497                .proc_handler   = proc_dointvec,
 498        },
 499#ifdef CONFIG_COREDUMP
 500        {
 501                .procname       = "core_uses_pid",
 502                .data           = &core_uses_pid,
 503                .maxlen         = sizeof(int),
 504                .mode           = 0644,
 505                .proc_handler   = proc_dointvec,
 506        },
 507        {
 508                .procname       = "core_pattern",
 509                .data           = core_pattern,
 510                .maxlen         = CORENAME_MAX_SIZE,
 511                .mode           = 0644,
 512                .proc_handler   = proc_dostring_coredump,
 513        },
 514        {
 515                .procname       = "core_pipe_limit",
 516                .data           = &core_pipe_limit,
 517                .maxlen         = sizeof(unsigned int),
 518                .mode           = 0644,
 519                .proc_handler   = proc_dointvec,
 520        },
 521#endif
 522#ifdef CONFIG_PROC_SYSCTL
 523        {
 524                .procname       = "tainted",
 525                .maxlen         = sizeof(long),
 526                .mode           = 0644,
 527                .proc_handler   = proc_taint,
 528        },
 529        {
 530                .procname       = "sysctl_writes_strict",
 531                .data           = &sysctl_writes_strict,
 532                .maxlen         = sizeof(int),
 533                .mode           = 0644,
 534                .proc_handler   = proc_dointvec_minmax,
 535                .extra1         = &neg_one,
 536                .extra2         = &one,
 537        },
 538#endif
 539#ifdef CONFIG_LATENCYTOP
 540        {
 541                .procname       = "latencytop",
 542                .data           = &latencytop_enabled,
 543                .maxlen         = sizeof(int),
 544                .mode           = 0644,
 545                .proc_handler   = sysctl_latencytop,
 546        },
 547#endif
 548#ifdef CONFIG_BLK_DEV_INITRD
 549        {
 550                .procname       = "real-root-dev",
 551                .data           = &real_root_dev,
 552                .maxlen         = sizeof(int),
 553                .mode           = 0644,
 554                .proc_handler   = proc_dointvec,
 555        },
 556#endif
 557        {
 558                .procname       = "print-fatal-signals",
 559                .data           = &print_fatal_signals,
 560                .maxlen         = sizeof(int),
 561                .mode           = 0644,
 562                .proc_handler   = proc_dointvec,
 563        },
 564#ifdef CONFIG_SPARC
 565        {
 566                .procname       = "reboot-cmd",
 567                .data           = reboot_command,
 568                .maxlen         = 256,
 569                .mode           = 0644,
 570                .proc_handler   = proc_dostring,
 571        },
 572        {
 573                .procname       = "stop-a",
 574                .data           = &stop_a_enabled,
 575                .maxlen         = sizeof (int),
 576                .mode           = 0644,
 577                .proc_handler   = proc_dointvec,
 578        },
 579        {
 580                .procname       = "scons-poweroff",
 581                .data           = &scons_pwroff,
 582                .maxlen         = sizeof (int),
 583                .mode           = 0644,
 584                .proc_handler   = proc_dointvec,
 585        },
 586#endif
 587#ifdef CONFIG_SPARC64
 588        {
 589                .procname       = "tsb-ratio",
 590                .data           = &sysctl_tsb_ratio,
 591                .maxlen         = sizeof (int),
 592                .mode           = 0644,
 593                .proc_handler   = proc_dointvec,
 594        },
 595#endif
 596#ifdef __hppa__
 597        {
 598                .procname       = "soft-power",
 599                .data           = &pwrsw_enabled,
 600                .maxlen         = sizeof (int),
 601                .mode           = 0644,
 602                .proc_handler   = proc_dointvec,
 603        },
 604#endif
 605#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
 606        {
 607                .procname       = "unaligned-trap",
 608                .data           = &unaligned_enabled,
 609                .maxlen         = sizeof (int),
 610                .mode           = 0644,
 611                .proc_handler   = proc_dointvec,
 612        },
 613#endif
 614        {
 615                .procname       = "ctrl-alt-del",
 616                .data           = &C_A_D,
 617                .maxlen         = sizeof(int),
 618                .mode           = 0644,
 619                .proc_handler   = proc_dointvec,
 620        },
 621#ifdef CONFIG_FUNCTION_TRACER
 622        {
 623                .procname       = "ftrace_enabled",
 624                .data           = &ftrace_enabled,
 625                .maxlen         = sizeof(int),
 626                .mode           = 0644,
 627                .proc_handler   = ftrace_enable_sysctl,
 628        },
 629#endif
 630#ifdef CONFIG_STACK_TRACER
 631        {
 632                .procname       = "stack_tracer_enabled",
 633                .data           = &stack_tracer_enabled,
 634                .maxlen         = sizeof(int),
 635                .mode           = 0644,
 636                .proc_handler   = stack_trace_sysctl,
 637        },
 638#endif
 639#ifdef CONFIG_TRACING
 640        {
 641                .procname       = "ftrace_dump_on_oops",
 642                .data           = &ftrace_dump_on_oops,
 643                .maxlen         = sizeof(int),
 644                .mode           = 0644,
 645                .proc_handler   = proc_dointvec,
 646        },
 647        {
 648                .procname       = "traceoff_on_warning",
 649                .data           = &__disable_trace_on_warning,
 650                .maxlen         = sizeof(__disable_trace_on_warning),
 651                .mode           = 0644,
 652                .proc_handler   = proc_dointvec,
 653        },
 654        {
 655                .procname       = "tracepoint_printk",
 656                .data           = &tracepoint_printk,
 657                .maxlen         = sizeof(tracepoint_printk),
 658                .mode           = 0644,
 659                .proc_handler   = tracepoint_printk_sysctl,
 660        },
 661#endif
 662#ifdef CONFIG_KEXEC_CORE
 663        {
 664                .procname       = "kexec_load_disabled",
 665                .data           = &kexec_load_disabled,
 666                .maxlen         = sizeof(int),
 667                .mode           = 0644,
 668                /* only handle a transition from default "0" to "1" */
 669                .proc_handler   = proc_dointvec_minmax,
 670                .extra1         = &one,
 671                .extra2         = &one,
 672        },
 673#endif
 674#ifdef CONFIG_MODULES
 675        {
 676                .procname       = "modprobe",
 677                .data           = &modprobe_path,
 678                .maxlen         = KMOD_PATH_LEN,
 679                .mode           = 0644,
 680                .proc_handler   = proc_dostring,
 681        },
 682        {
 683                .procname       = "modules_disabled",
 684                .data           = &modules_disabled,
 685                .maxlen         = sizeof(int),
 686                .mode           = 0644,
 687                /* only handle a transition from default "0" to "1" */
 688                .proc_handler   = proc_dointvec_minmax,
 689                .extra1         = &one,
 690                .extra2         = &one,
 691        },
 692#endif
 693#ifdef CONFIG_UEVENT_HELPER
 694        {
 695                .procname       = "hotplug",
 696                .data           = &uevent_helper,
 697                .maxlen         = UEVENT_HELPER_PATH_LEN,
 698                .mode           = 0644,
 699                .proc_handler   = proc_dostring,
 700        },
 701#endif
 702#ifdef CONFIG_CHR_DEV_SG
 703        {
 704                .procname       = "sg-big-buff",
 705                .data           = &sg_big_buff,
 706                .maxlen         = sizeof (int),
 707                .mode           = 0444,
 708                .proc_handler   = proc_dointvec,
 709        },
 710#endif
 711#ifdef CONFIG_BSD_PROCESS_ACCT
 712        {
 713                .procname       = "acct",
 714                .data           = &acct_parm,
 715                .maxlen         = 3*sizeof(int),
 716                .mode           = 0644,
 717                .proc_handler   = proc_dointvec,
 718        },
 719#endif
 720#ifdef CONFIG_MAGIC_SYSRQ
 721        {
 722                .procname       = "sysrq",
 723                .data           = &__sysrq_enabled,
 724                .maxlen         = sizeof (int),
 725                .mode           = 0644,
 726                .proc_handler   = sysrq_sysctl_handler,
 727        },
 728#endif
 729#ifdef CONFIG_PROC_SYSCTL
 730        {
 731                .procname       = "cad_pid",
 732                .data           = NULL,
 733                .maxlen         = sizeof (int),
 734                .mode           = 0600,
 735                .proc_handler   = proc_do_cad_pid,
 736        },
 737#endif
 738        {
 739                .procname       = "threads-max",
 740                .data           = NULL,
 741                .maxlen         = sizeof(int),
 742                .mode           = 0644,
 743                .proc_handler   = sysctl_max_threads,
 744        },
 745        {
 746                .procname       = "random",
 747                .mode           = 0555,
 748                .child          = random_table,
 749        },
 750        {
 751                .procname       = "usermodehelper",
 752                .mode           = 0555,
 753                .child          = usermodehelper_table,
 754        },
 755#ifdef CONFIG_FW_LOADER_USER_HELPER
 756        {
 757                .procname       = "firmware_config",
 758                .mode           = 0555,
 759                .child          = firmware_config_table,
 760        },
 761#endif
 762        {
 763                .procname       = "overflowuid",
 764                .data           = &overflowuid,
 765                .maxlen         = sizeof(int),
 766                .mode           = 0644,
 767                .proc_handler   = proc_dointvec_minmax,
 768                .extra1         = &minolduid,
 769                .extra2         = &maxolduid,
 770        },
 771        {
 772                .procname       = "overflowgid",
 773                .data           = &overflowgid,
 774                .maxlen         = sizeof(int),
 775                .mode           = 0644,
 776                .proc_handler   = proc_dointvec_minmax,
 777                .extra1         = &minolduid,
 778                .extra2         = &maxolduid,
 779        },
 780#ifdef CONFIG_S390
 781#ifdef CONFIG_MATHEMU
 782        {
 783                .procname       = "ieee_emulation_warnings",
 784                .data           = &sysctl_ieee_emulation_warnings,
 785                .maxlen         = sizeof(int),
 786                .mode           = 0644,
 787                .proc_handler   = proc_dointvec,
 788        },
 789#endif
 790        {
 791                .procname       = "userprocess_debug",
 792                .data           = &show_unhandled_signals,
 793                .maxlen         = sizeof(int),
 794                .mode           = 0644,
 795                .proc_handler   = proc_dointvec,
 796        },
 797#endif
 798        {
 799                .procname       = "pid_max",
 800                .data           = &pid_max,
 801                .maxlen         = sizeof (int),
 802                .mode           = 0644,
 803                .proc_handler   = proc_dointvec_minmax,
 804                .extra1         = &pid_max_min,
 805                .extra2         = &pid_max_max,
 806        },
 807        {
 808                .procname       = "panic_on_oops",
 809                .data           = &panic_on_oops,
 810                .maxlen         = sizeof(int),
 811                .mode           = 0644,
 812                .proc_handler   = proc_dointvec,
 813        },
 814#if defined CONFIG_PRINTK
 815        {
 816                .procname       = "printk",
 817                .data           = &console_loglevel,
 818                .maxlen         = 4*sizeof(int),
 819                .mode           = 0644,
 820                .proc_handler   = proc_dointvec,
 821        },
 822        {
 823                .procname       = "printk_ratelimit",
 824                .data           = &printk_ratelimit_state.interval,
 825                .maxlen         = sizeof(int),
 826                .mode           = 0644,
 827                .proc_handler   = proc_dointvec_jiffies,
 828        },
 829        {
 830                .procname       = "printk_ratelimit_burst",
 831                .data           = &printk_ratelimit_state.burst,
 832                .maxlen         = sizeof(int),
 833                .mode           = 0644,
 834                .proc_handler   = proc_dointvec,
 835        },
 836        {
 837                .procname       = "printk_delay",
 838                .data           = &printk_delay_msec,
 839                .maxlen         = sizeof(int),
 840                .mode           = 0644,
 841                .proc_handler   = proc_dointvec_minmax,
 842                .extra1         = &zero,
 843                .extra2         = &ten_thousand,
 844        },
 845        {
 846                .procname       = "printk_devkmsg",
 847                .data           = devkmsg_log_str,
 848                .maxlen         = DEVKMSG_STR_MAX_SIZE,
 849                .mode           = 0644,
 850                .proc_handler   = devkmsg_sysctl_set_loglvl,
 851        },
 852        {
 853                .procname       = "dmesg_restrict",
 854                .data           = &dmesg_restrict,
 855                .maxlen         = sizeof(int),
 856                .mode           = 0644,
 857                .proc_handler   = proc_dointvec_minmax_sysadmin,
 858                .extra1         = &zero,
 859                .extra2         = &one,
 860        },
 861        {
 862                .procname       = "kptr_restrict",
 863                .data           = &kptr_restrict,
 864                .maxlen         = sizeof(int),
 865                .mode           = 0644,
 866                .proc_handler   = proc_dointvec_minmax_sysadmin,
 867                .extra1         = &zero,
 868                .extra2         = &two,
 869        },
 870#endif
 871        {
 872                .procname       = "ngroups_max",
 873                .data           = &ngroups_max,
 874                .maxlen         = sizeof (int),
 875                .mode           = 0444,
 876                .proc_handler   = proc_dointvec,
 877        },
 878        {
 879                .procname       = "cap_last_cap",
 880                .data           = (void *)&cap_last_cap,
 881                .maxlen         = sizeof(int),
 882                .mode           = 0444,
 883                .proc_handler   = proc_dointvec,
 884        },
 885#if defined(CONFIG_LOCKUP_DETECTOR)
 886        {
 887                .procname       = "watchdog",
 888                .data           = &watchdog_user_enabled,
 889                .maxlen         = sizeof(int),
 890                .mode           = 0644,
 891                .proc_handler   = proc_watchdog,
 892                .extra1         = &zero,
 893                .extra2         = &one,
 894        },
 895        {
 896                .procname       = "watchdog_thresh",
 897                .data           = &watchdog_thresh,
 898                .maxlen         = sizeof(int),
 899                .mode           = 0644,
 900                .proc_handler   = proc_watchdog_thresh,
 901                .extra1         = &zero,
 902                .extra2         = &sixty,
 903        },
 904        {
 905                .procname       = "nmi_watchdog",
 906                .data           = &nmi_watchdog_user_enabled,
 907                .maxlen         = sizeof(int),
 908                .mode           = NMI_WATCHDOG_SYSCTL_PERM,
 909                .proc_handler   = proc_nmi_watchdog,
 910                .extra1         = &zero,
 911                .extra2         = &one,
 912        },
 913        {
 914                .procname       = "watchdog_cpumask",
 915                .data           = &watchdog_cpumask_bits,
 916                .maxlen         = NR_CPUS,
 917                .mode           = 0644,
 918                .proc_handler   = proc_watchdog_cpumask,
 919        },
 920#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 921        {
 922                .procname       = "soft_watchdog",
 923                .data           = &soft_watchdog_user_enabled,
 924                .maxlen         = sizeof(int),
 925                .mode           = 0644,
 926                .proc_handler   = proc_soft_watchdog,
 927                .extra1         = &zero,
 928                .extra2         = &one,
 929        },
 930        {
 931                .procname       = "softlockup_panic",
 932                .data           = &softlockup_panic,
 933                .maxlen         = sizeof(int),
 934                .mode           = 0644,
 935                .proc_handler   = proc_dointvec_minmax,
 936                .extra1         = &zero,
 937                .extra2         = &one,
 938        },
 939#ifdef CONFIG_SMP
 940        {
 941                .procname       = "softlockup_all_cpu_backtrace",
 942                .data           = &sysctl_softlockup_all_cpu_backtrace,
 943                .maxlen         = sizeof(int),
 944                .mode           = 0644,
 945                .proc_handler   = proc_dointvec_minmax,
 946                .extra1         = &zero,
 947                .extra2         = &one,
 948        },
 949#endif /* CONFIG_SMP */
 950#endif
 951#ifdef CONFIG_HARDLOCKUP_DETECTOR
 952        {
 953                .procname       = "hardlockup_panic",
 954                .data           = &hardlockup_panic,
 955                .maxlen         = sizeof(int),
 956                .mode           = 0644,
 957                .proc_handler   = proc_dointvec_minmax,
 958                .extra1         = &zero,
 959                .extra2         = &one,
 960        },
 961#ifdef CONFIG_SMP
 962        {
 963                .procname       = "hardlockup_all_cpu_backtrace",
 964                .data           = &sysctl_hardlockup_all_cpu_backtrace,
 965                .maxlen         = sizeof(int),
 966                .mode           = 0644,
 967                .proc_handler   = proc_dointvec_minmax,
 968                .extra1         = &zero,
 969                .extra2         = &one,
 970        },
 971#endif /* CONFIG_SMP */
 972#endif
 973#endif
 974
 975#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 976        {
 977                .procname       = "unknown_nmi_panic",
 978                .data           = &unknown_nmi_panic,
 979                .maxlen         = sizeof (int),
 980                .mode           = 0644,
 981                .proc_handler   = proc_dointvec,
 982        },
 983#endif
 984#if defined(CONFIG_X86)
 985        {
 986                .procname       = "panic_on_unrecovered_nmi",
 987                .data           = &panic_on_unrecovered_nmi,
 988                .maxlen         = sizeof(int),
 989                .mode           = 0644,
 990                .proc_handler   = proc_dointvec,
 991        },
 992        {
 993                .procname       = "panic_on_io_nmi",
 994                .data           = &panic_on_io_nmi,
 995                .maxlen         = sizeof(int),
 996                .mode           = 0644,
 997                .proc_handler   = proc_dointvec,
 998        },
 999#ifdef CONFIG_DEBUG_STACKOVERFLOW
1000        {
1001                .procname       = "panic_on_stackoverflow",
1002                .data           = &sysctl_panic_on_stackoverflow,
1003                .maxlen         = sizeof(int),
1004                .mode           = 0644,
1005                .proc_handler   = proc_dointvec,
1006        },
1007#endif
1008        {
1009                .procname       = "bootloader_type",
1010                .data           = &bootloader_type,
1011                .maxlen         = sizeof (int),
1012                .mode           = 0444,
1013                .proc_handler   = proc_dointvec,
1014        },
1015        {
1016                .procname       = "bootloader_version",
1017                .data           = &bootloader_version,
1018                .maxlen         = sizeof (int),
1019                .mode           = 0444,
1020                .proc_handler   = proc_dointvec,
1021        },
1022        {
1023                .procname       = "io_delay_type",
1024                .data           = &io_delay_type,
1025                .maxlen         = sizeof(int),
1026                .mode           = 0644,
1027                .proc_handler   = proc_dointvec,
1028        },
1029#endif
1030#if defined(CONFIG_MMU)
1031        {
1032                .procname       = "randomize_va_space",
1033                .data           = &randomize_va_space,
1034                .maxlen         = sizeof(int),
1035                .mode           = 0644,
1036                .proc_handler   = proc_dointvec,
1037        },
1038#endif
1039#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1040        {
1041                .procname       = "spin_retry",
1042                .data           = &spin_retry,
1043                .maxlen         = sizeof (int),
1044                .mode           = 0644,
1045                .proc_handler   = proc_dointvec,
1046        },
1047#endif
1048#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1049        {
1050                .procname       = "acpi_video_flags",
1051                .data           = &acpi_realmode_flags,
1052                .maxlen         = sizeof (unsigned long),
1053                .mode           = 0644,
1054                .proc_handler   = proc_doulongvec_minmax,
1055        },
1056#endif
1057#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1058        {
1059                .procname       = "ignore-unaligned-usertrap",
1060                .data           = &no_unaligned_warning,
1061                .maxlen         = sizeof (int),
1062                .mode           = 0644,
1063                .proc_handler   = proc_dointvec,
1064        },
1065#endif
1066#ifdef CONFIG_IA64
1067        {
1068                .procname       = "unaligned-dump-stack",
1069                .data           = &unaligned_dump_stack,
1070                .maxlen         = sizeof (int),
1071                .mode           = 0644,
1072                .proc_handler   = proc_dointvec,
1073        },
1074#endif
1075#ifdef CONFIG_DETECT_HUNG_TASK
1076        {
1077                .procname       = "hung_task_panic",
1078                .data           = &sysctl_hung_task_panic,
1079                .maxlen         = sizeof(int),
1080                .mode           = 0644,
1081                .proc_handler   = proc_dointvec_minmax,
1082                .extra1         = &zero,
1083                .extra2         = &one,
1084        },
1085        {
1086                .procname       = "hung_task_check_count",
1087                .data           = &sysctl_hung_task_check_count,
1088                .maxlen         = sizeof(int),
1089                .mode           = 0644,
1090                .proc_handler   = proc_dointvec_minmax,
1091                .extra1         = &zero,
1092        },
1093        {
1094                .procname       = "hung_task_timeout_secs",
1095                .data           = &sysctl_hung_task_timeout_secs,
1096                .maxlen         = sizeof(unsigned long),
1097                .mode           = 0644,
1098                .proc_handler   = proc_dohung_task_timeout_secs,
1099                .extra2         = &hung_task_timeout_max,
1100        },
1101        {
1102                .procname       = "hung_task_warnings",
1103                .data           = &sysctl_hung_task_warnings,
1104                .maxlen         = sizeof(int),
1105                .mode           = 0644,
1106                .proc_handler   = proc_dointvec_minmax,
1107                .extra1         = &neg_one,
1108        },
1109#endif
1110#ifdef CONFIG_RT_MUTEXES
1111        {
1112                .procname       = "max_lock_depth",
1113                .data           = &max_lock_depth,
1114                .maxlen         = sizeof(int),
1115                .mode           = 0644,
1116                .proc_handler   = proc_dointvec,
1117        },
1118#endif
1119        {
1120                .procname       = "poweroff_cmd",
1121                .data           = &poweroff_cmd,
1122                .maxlen         = POWEROFF_CMD_PATH_LEN,
1123                .mode           = 0644,
1124                .proc_handler   = proc_dostring,
1125        },
1126#ifdef CONFIG_KEYS
1127        {
1128                .procname       = "keys",
1129                .mode           = 0555,
1130                .child          = key_sysctls,
1131        },
1132#endif
1133#ifdef CONFIG_PERF_EVENTS
1134        /*
1135         * User-space scripts rely on the existence of this file
1136         * as a feature check for perf_events being enabled.
1137         *
1138         * So it's an ABI, do not remove!
1139         */
1140        {
1141                .procname       = "perf_event_paranoid",
1142                .data           = &sysctl_perf_event_paranoid,
1143                .maxlen         = sizeof(sysctl_perf_event_paranoid),
1144                .mode           = 0644,
1145                .proc_handler   = proc_dointvec,
1146        },
1147        {
1148                .procname       = "perf_event_mlock_kb",
1149                .data           = &sysctl_perf_event_mlock,
1150                .maxlen         = sizeof(sysctl_perf_event_mlock),
1151                .mode           = 0644,
1152                .proc_handler   = proc_dointvec,
1153        },
1154        {
1155                .procname       = "perf_event_max_sample_rate",
1156                .data           = &sysctl_perf_event_sample_rate,
1157                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1158                .mode           = 0644,
1159                .proc_handler   = perf_proc_update_handler,
1160                .extra1         = &one,
1161        },
1162        {
1163                .procname       = "perf_cpu_time_max_percent",
1164                .data           = &sysctl_perf_cpu_time_max_percent,
1165                .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1166                .mode           = 0644,
1167                .proc_handler   = perf_cpu_time_max_percent_handler,
1168                .extra1         = &zero,
1169                .extra2         = &one_hundred,
1170        },
1171        {
1172                .procname       = "perf_event_max_stack",
1173                .data           = &sysctl_perf_event_max_stack,
1174                .maxlen         = sizeof(sysctl_perf_event_max_stack),
1175                .mode           = 0644,
1176                .proc_handler   = perf_event_max_stack_handler,
1177                .extra1         = &zero,
1178                .extra2         = &six_hundred_forty_kb,
1179        },
1180        {
1181                .procname       = "perf_event_max_contexts_per_stack",
1182                .data           = &sysctl_perf_event_max_contexts_per_stack,
1183                .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1184                .mode           = 0644,
1185                .proc_handler   = perf_event_max_stack_handler,
1186                .extra1         = &zero,
1187                .extra2         = &one_thousand,
1188        },
1189#endif
1190        {
1191                .procname       = "panic_on_warn",
1192                .data           = &panic_on_warn,
1193                .maxlen         = sizeof(int),
1194                .mode           = 0644,
1195                .proc_handler   = proc_dointvec_minmax,
1196                .extra1         = &zero,
1197                .extra2         = &one,
1198        },
1199#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1200        {
1201                .procname       = "timer_migration",
1202                .data           = &sysctl_timer_migration,
1203                .maxlen         = sizeof(unsigned int),
1204                .mode           = 0644,
1205                .proc_handler   = timer_migration_handler,
1206                .extra1         = &zero,
1207                .extra2         = &one,
1208        },
1209#endif
1210#ifdef CONFIG_BPF_SYSCALL
1211        {
1212                .procname       = "unprivileged_bpf_disabled",
1213                .data           = &sysctl_unprivileged_bpf_disabled,
1214                .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1215                .mode           = 0644,
1216                /* only handle a transition from default "0" to "1" */
1217                .proc_handler   = proc_dointvec_minmax,
1218                .extra1         = &one,
1219                .extra2         = &one,
1220        },
1221#endif
1222#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1223        {
1224                .procname       = "panic_on_rcu_stall",
1225                .data           = &sysctl_panic_on_rcu_stall,
1226                .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1227                .mode           = 0644,
1228                .proc_handler   = proc_dointvec_minmax,
1229                .extra1         = &zero,
1230                .extra2         = &one,
1231        },
1232#endif
1233        { }
1234};
1235
1236static struct ctl_table vm_table[] = {
1237        {
1238                .procname       = "overcommit_memory",
1239                .data           = &sysctl_overcommit_memory,
1240                .maxlen         = sizeof(sysctl_overcommit_memory),
1241                .mode           = 0644,
1242                .proc_handler   = proc_dointvec_minmax,
1243                .extra1         = &zero,
1244                .extra2         = &two,
1245        },
1246        {
1247                .procname       = "panic_on_oom",
1248                .data           = &sysctl_panic_on_oom,
1249                .maxlen         = sizeof(sysctl_panic_on_oom),
1250                .mode           = 0644,
1251                .proc_handler   = proc_dointvec_minmax,
1252                .extra1         = &zero,
1253                .extra2         = &two,
1254        },
1255        {
1256                .procname       = "oom_kill_allocating_task",
1257                .data           = &sysctl_oom_kill_allocating_task,
1258                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1259                .mode           = 0644,
1260                .proc_handler   = proc_dointvec,
1261        },
1262        {
1263                .procname       = "oom_dump_tasks",
1264                .data           = &sysctl_oom_dump_tasks,
1265                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1266                .mode           = 0644,
1267                .proc_handler   = proc_dointvec,
1268        },
1269        {
1270                .procname       = "overcommit_ratio",
1271                .data           = &sysctl_overcommit_ratio,
1272                .maxlen         = sizeof(sysctl_overcommit_ratio),
1273                .mode           = 0644,
1274                .proc_handler   = overcommit_ratio_handler,
1275        },
1276        {
1277                .procname       = "overcommit_kbytes",
1278                .data           = &sysctl_overcommit_kbytes,
1279                .maxlen         = sizeof(sysctl_overcommit_kbytes),
1280                .mode           = 0644,
1281                .proc_handler   = overcommit_kbytes_handler,
1282        },
1283        {
1284                .procname       = "page-cluster", 
1285                .data           = &page_cluster,
1286                .maxlen         = sizeof(int),
1287                .mode           = 0644,
1288                .proc_handler   = proc_dointvec_minmax,
1289                .extra1         = &zero,
1290        },
1291        {
1292                .procname       = "dirty_background_ratio",
1293                .data           = &dirty_background_ratio,
1294                .maxlen         = sizeof(dirty_background_ratio),
1295                .mode           = 0644,
1296                .proc_handler   = dirty_background_ratio_handler,
1297                .extra1         = &zero,
1298                .extra2         = &one_hundred,
1299        },
1300        {
1301                .procname       = "dirty_background_bytes",
1302                .data           = &dirty_background_bytes,
1303                .maxlen         = sizeof(dirty_background_bytes),
1304                .mode           = 0644,
1305                .proc_handler   = dirty_background_bytes_handler,
1306                .extra1         = &one_ul,
1307        },
1308        {
1309                .procname       = "dirty_ratio",
1310                .data           = &vm_dirty_ratio,
1311                .maxlen         = sizeof(vm_dirty_ratio),
1312                .mode           = 0644,
1313                .proc_handler   = dirty_ratio_handler,
1314                .extra1         = &zero,
1315                .extra2         = &one_hundred,
1316        },
1317        {
1318                .procname       = "dirty_bytes",
1319                .data           = &vm_dirty_bytes,
1320                .maxlen         = sizeof(vm_dirty_bytes),
1321                .mode           = 0644,
1322                .proc_handler   = dirty_bytes_handler,
1323                .extra1         = &dirty_bytes_min,
1324        },
1325        {
1326                .procname       = "dirty_writeback_centisecs",
1327                .data           = &dirty_writeback_interval,
1328                .maxlen         = sizeof(dirty_writeback_interval),
1329                .mode           = 0644,
1330                .proc_handler   = dirty_writeback_centisecs_handler,
1331        },
1332        {
1333                .procname       = "dirty_expire_centisecs",
1334                .data           = &dirty_expire_interval,
1335                .maxlen         = sizeof(dirty_expire_interval),
1336                .mode           = 0644,
1337                .proc_handler   = proc_dointvec_minmax,
1338                .extra1         = &zero,
1339        },
1340        {
1341                .procname       = "dirtytime_expire_seconds",
1342                .data           = &dirtytime_expire_interval,
1343                .maxlen         = sizeof(dirtytime_expire_interval),
1344                .mode           = 0644,
1345                .proc_handler   = dirtytime_interval_handler,
1346                .extra1         = &zero,
1347        },
1348        {
1349                .procname       = "swappiness",
1350                .data           = &vm_swappiness,
1351                .maxlen         = sizeof(vm_swappiness),
1352                .mode           = 0644,
1353                .proc_handler   = proc_dointvec_minmax,
1354                .extra1         = &zero,
1355                .extra2         = &one_hundred,
1356        },
1357#ifdef CONFIG_HUGETLB_PAGE
1358        {
1359                .procname       = "nr_hugepages",
1360                .data           = NULL,
1361                .maxlen         = sizeof(unsigned long),
1362                .mode           = 0644,
1363                .proc_handler   = hugetlb_sysctl_handler,
1364        },
1365#ifdef CONFIG_NUMA
1366        {
1367                .procname       = "nr_hugepages_mempolicy",
1368                .data           = NULL,
1369                .maxlen         = sizeof(unsigned long),
1370                .mode           = 0644,
1371                .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1372        },
1373        {
1374                .procname               = "numa_stat",
1375                .data                   = &sysctl_vm_numa_stat,
1376                .maxlen                 = sizeof(int),
1377                .mode                   = 0644,
1378                .proc_handler   = sysctl_vm_numa_stat_handler,
1379                .extra1                 = &zero,
1380                .extra2                 = &one,
1381        },
1382#endif
1383         {
1384                .procname       = "hugetlb_shm_group",
1385                .data           = &sysctl_hugetlb_shm_group,
1386                .maxlen         = sizeof(gid_t),
1387                .mode           = 0644,
1388                .proc_handler   = proc_dointvec,
1389         },
1390        {
1391                .procname       = "nr_overcommit_hugepages",
1392                .data           = NULL,
1393                .maxlen         = sizeof(unsigned long),
1394                .mode           = 0644,
1395                .proc_handler   = hugetlb_overcommit_handler,
1396        },
1397#endif
1398        {
1399                .procname       = "lowmem_reserve_ratio",
1400                .data           = &sysctl_lowmem_reserve_ratio,
1401                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1402                .mode           = 0644,
1403                .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1404        },
1405        {
1406                .procname       = "drop_caches",
1407                .data           = &sysctl_drop_caches,
1408                .maxlen         = sizeof(int),
1409                .mode           = 0644,
1410                .proc_handler   = drop_caches_sysctl_handler,
1411                .extra1         = &one,
1412                .extra2         = &four,
1413        },
1414#ifdef CONFIG_COMPACTION
1415        {
1416                .procname       = "compact_memory",
1417                .data           = &sysctl_compact_memory,
1418                .maxlen         = sizeof(int),
1419                .mode           = 0200,
1420                .proc_handler   = sysctl_compaction_handler,
1421        },
1422        {
1423                .procname       = "extfrag_threshold",
1424                .data           = &sysctl_extfrag_threshold,
1425                .maxlen         = sizeof(int),
1426                .mode           = 0644,
1427                .proc_handler   = sysctl_extfrag_handler,
1428                .extra1         = &min_extfrag_threshold,
1429                .extra2         = &max_extfrag_threshold,
1430        },
1431        {
1432                .procname       = "compact_unevictable_allowed",
1433                .data           = &sysctl_compact_unevictable_allowed,
1434                .maxlen         = sizeof(int),
1435                .mode           = 0644,
1436                .proc_handler   = proc_dointvec,
1437                .extra1         = &zero,
1438                .extra2         = &one,
1439        },
1440
1441#endif /* CONFIG_COMPACTION */
1442        {
1443                .procname       = "min_free_kbytes",
1444                .data           = &min_free_kbytes,
1445                .maxlen         = sizeof(min_free_kbytes),
1446                .mode           = 0644,
1447                .proc_handler   = min_free_kbytes_sysctl_handler,
1448                .extra1         = &zero,
1449        },
1450        {
1451                .procname       = "watermark_scale_factor",
1452                .data           = &watermark_scale_factor,
1453                .maxlen         = sizeof(watermark_scale_factor),
1454                .mode           = 0644,
1455                .proc_handler   = watermark_scale_factor_sysctl_handler,
1456                .extra1         = &one,
1457                .extra2         = &one_thousand,
1458        },
1459        {
1460                .procname       = "percpu_pagelist_fraction",
1461                .data           = &percpu_pagelist_fraction,
1462                .maxlen         = sizeof(percpu_pagelist_fraction),
1463                .mode           = 0644,
1464                .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1465                .extra1         = &zero,
1466        },
1467#ifdef CONFIG_MMU
1468        {
1469                .procname       = "max_map_count",
1470                .data           = &sysctl_max_map_count,
1471                .maxlen         = sizeof(sysctl_max_map_count),
1472                .mode           = 0644,
1473                .proc_handler   = proc_dointvec_minmax,
1474                .extra1         = &zero,
1475        },
1476#else
1477        {
1478                .procname       = "nr_trim_pages",
1479                .data           = &sysctl_nr_trim_pages,
1480                .maxlen         = sizeof(sysctl_nr_trim_pages),
1481                .mode           = 0644,
1482                .proc_handler   = proc_dointvec_minmax,
1483                .extra1         = &zero,
1484        },
1485#endif
1486        {
1487                .procname       = "laptop_mode",
1488                .data           = &laptop_mode,
1489                .maxlen         = sizeof(laptop_mode),
1490                .mode           = 0644,
1491                .proc_handler   = proc_dointvec_jiffies,
1492        },
1493        {
1494                .procname       = "block_dump",
1495                .data           = &block_dump,
1496                .maxlen         = sizeof(block_dump),
1497                .mode           = 0644,
1498                .proc_handler   = proc_dointvec,
1499                .extra1         = &zero,
1500        },
1501        {
1502                .procname       = "vfs_cache_pressure",
1503                .data           = &sysctl_vfs_cache_pressure,
1504                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1505                .mode           = 0644,
1506                .proc_handler   = proc_dointvec,
1507                .extra1         = &zero,
1508        },
1509#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1510        {
1511                .procname       = "legacy_va_layout",
1512                .data           = &sysctl_legacy_va_layout,
1513                .maxlen         = sizeof(sysctl_legacy_va_layout),
1514                .mode           = 0644,
1515                .proc_handler   = proc_dointvec,
1516                .extra1         = &zero,
1517        },
1518#endif
1519#ifdef CONFIG_NUMA
1520        {
1521                .procname       = "zone_reclaim_mode",
1522                .data           = &node_reclaim_mode,
1523                .maxlen         = sizeof(node_reclaim_mode),
1524                .mode           = 0644,
1525                .proc_handler   = proc_dointvec,
1526                .extra1         = &zero,
1527        },
1528        {
1529                .procname       = "min_unmapped_ratio",
1530                .data           = &sysctl_min_unmapped_ratio,
1531                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1532                .mode           = 0644,
1533                .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1534                .extra1         = &zero,
1535                .extra2         = &one_hundred,
1536        },
1537        {
1538                .procname       = "min_slab_ratio",
1539                .data           = &sysctl_min_slab_ratio,
1540                .maxlen         = sizeof(sysctl_min_slab_ratio),
1541                .mode           = 0644,
1542                .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1543                .extra1         = &zero,
1544                .extra2         = &one_hundred,
1545        },
1546#endif
1547#ifdef CONFIG_SMP
1548        {
1549                .procname       = "stat_interval",
1550                .data           = &sysctl_stat_interval,
1551                .maxlen         = sizeof(sysctl_stat_interval),
1552                .mode           = 0644,
1553                .proc_handler   = proc_dointvec_jiffies,
1554        },
1555        {
1556                .procname       = "stat_refresh",
1557                .data           = NULL,
1558                .maxlen         = 0,
1559                .mode           = 0600,
1560                .proc_handler   = vmstat_refresh,
1561        },
1562#endif
1563#ifdef CONFIG_MMU
1564        {
1565                .procname       = "mmap_min_addr",
1566                .data           = &dac_mmap_min_addr,
1567                .maxlen         = sizeof(unsigned long),
1568                .mode           = 0644,
1569                .proc_handler   = mmap_min_addr_handler,
1570        },
1571#endif
1572#ifdef CONFIG_NUMA
1573        {
1574                .procname       = "numa_zonelist_order",
1575                .data           = &numa_zonelist_order,
1576                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1577                .mode           = 0644,
1578                .proc_handler   = numa_zonelist_order_handler,
1579        },
1580#endif
1581#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1582   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1583        {
1584                .procname       = "vdso_enabled",
1585#ifdef CONFIG_X86_32
1586                .data           = &vdso32_enabled,
1587                .maxlen         = sizeof(vdso32_enabled),
1588#else
1589                .data           = &vdso_enabled,
1590                .maxlen         = sizeof(vdso_enabled),
1591#endif
1592                .mode           = 0644,
1593                .proc_handler   = proc_dointvec,
1594                .extra1         = &zero,
1595        },
1596#endif
1597#ifdef CONFIG_HIGHMEM
1598        {
1599                .procname       = "highmem_is_dirtyable",
1600                .data           = &vm_highmem_is_dirtyable,
1601                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1602                .mode           = 0644,
1603                .proc_handler   = proc_dointvec_minmax,
1604                .extra1         = &zero,
1605                .extra2         = &one,
1606        },
1607#endif
1608#ifdef CONFIG_MEMORY_FAILURE
1609        {
1610                .procname       = "memory_failure_early_kill",
1611                .data           = &sysctl_memory_failure_early_kill,
1612                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1613                .mode           = 0644,
1614                .proc_handler   = proc_dointvec_minmax,
1615                .extra1         = &zero,
1616                .extra2         = &one,
1617        },
1618        {
1619                .procname       = "memory_failure_recovery",
1620                .data           = &sysctl_memory_failure_recovery,
1621                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1622                .mode           = 0644,
1623                .proc_handler   = proc_dointvec_minmax,
1624                .extra1         = &zero,
1625                .extra2         = &one,
1626        },
1627#endif
1628        {
1629                .procname       = "user_reserve_kbytes",
1630                .data           = &sysctl_user_reserve_kbytes,
1631                .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1632                .mode           = 0644,
1633                .proc_handler   = proc_doulongvec_minmax,
1634        },
1635        {
1636                .procname       = "admin_reserve_kbytes",
1637                .data           = &sysctl_admin_reserve_kbytes,
1638                .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1639                .mode           = 0644,
1640                .proc_handler   = proc_doulongvec_minmax,
1641        },
1642#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1643        {
1644                .procname       = "mmap_rnd_bits",
1645                .data           = &mmap_rnd_bits,
1646                .maxlen         = sizeof(mmap_rnd_bits),
1647                .mode           = 0600,
1648                .proc_handler   = proc_dointvec_minmax,
1649                .extra1         = (void *)&mmap_rnd_bits_min,
1650                .extra2         = (void *)&mmap_rnd_bits_max,
1651        },
1652#endif
1653#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1654        {
1655                .procname       = "mmap_rnd_compat_bits",
1656                .data           = &mmap_rnd_compat_bits,
1657                .maxlen         = sizeof(mmap_rnd_compat_bits),
1658                .mode           = 0600,
1659                .proc_handler   = proc_dointvec_minmax,
1660                .extra1         = (void *)&mmap_rnd_compat_bits_min,
1661                .extra2         = (void *)&mmap_rnd_compat_bits_max,
1662        },
1663#endif
1664        { }
1665};
1666
1667static struct ctl_table fs_table[] = {
1668        {
1669                .procname       = "inode-nr",
1670                .data           = &inodes_stat,
1671                .maxlen         = 2*sizeof(long),
1672                .mode           = 0444,
1673                .proc_handler   = proc_nr_inodes,
1674        },
1675        {
1676                .procname       = "inode-state",
1677                .data           = &inodes_stat,
1678                .maxlen         = 7*sizeof(long),
1679                .mode           = 0444,
1680                .proc_handler   = proc_nr_inodes,
1681        },
1682        {
1683                .procname       = "file-nr",
1684                .data           = &files_stat,
1685                .maxlen         = sizeof(files_stat),
1686                .mode           = 0444,
1687                .proc_handler   = proc_nr_files,
1688        },
1689        {
1690                .procname       = "file-max",
1691                .data           = &files_stat.max_files,
1692                .maxlen         = sizeof(files_stat.max_files),
1693                .mode           = 0644,
1694                .proc_handler   = proc_doulongvec_minmax,
1695        },
1696        {
1697                .procname       = "nr_open",
1698                .data           = &sysctl_nr_open,
1699                .maxlen         = sizeof(unsigned int),
1700                .mode           = 0644,
1701                .proc_handler   = proc_dointvec_minmax,
1702                .extra1         = &sysctl_nr_open_min,
1703                .extra2         = &sysctl_nr_open_max,
1704        },
1705        {
1706                .procname       = "dentry-state",
1707                .data           = &dentry_stat,
1708                .maxlen         = 6*sizeof(long),
1709                .mode           = 0444,
1710                .proc_handler   = proc_nr_dentry,
1711        },
1712        {
1713                .procname       = "overflowuid",
1714                .data           = &fs_overflowuid,
1715                .maxlen         = sizeof(int),
1716                .mode           = 0644,
1717                .proc_handler   = proc_dointvec_minmax,
1718                .extra1         = &minolduid,
1719                .extra2         = &maxolduid,
1720        },
1721        {
1722                .procname       = "overflowgid",
1723                .data           = &fs_overflowgid,
1724                .maxlen         = sizeof(int),
1725                .mode           = 0644,
1726                .proc_handler   = proc_dointvec_minmax,
1727                .extra1         = &minolduid,
1728                .extra2         = &maxolduid,
1729        },
1730#ifdef CONFIG_FILE_LOCKING
1731        {
1732                .procname       = "leases-enable",
1733                .data           = &leases_enable,
1734                .maxlen         = sizeof(int),
1735                .mode           = 0644,
1736                .proc_handler   = proc_dointvec,
1737        },
1738#endif
1739#ifdef CONFIG_DNOTIFY
1740        {
1741                .procname       = "dir-notify-enable",
1742                .data           = &dir_notify_enable,
1743                .maxlen         = sizeof(int),
1744                .mode           = 0644,
1745                .proc_handler   = proc_dointvec,
1746        },
1747#endif
1748#ifdef CONFIG_MMU
1749#ifdef CONFIG_FILE_LOCKING
1750        {
1751                .procname       = "lease-break-time",
1752                .data           = &lease_break_time,
1753                .maxlen         = sizeof(int),
1754                .mode           = 0644,
1755                .proc_handler   = proc_dointvec,
1756        },
1757#endif
1758#ifdef CONFIG_AIO
1759        {
1760                .procname       = "aio-nr",
1761                .data           = &aio_nr,
1762                .maxlen         = sizeof(aio_nr),
1763                .mode           = 0444,
1764                .proc_handler   = proc_doulongvec_minmax,
1765        },
1766        {
1767                .procname       = "aio-max-nr",
1768                .data           = &aio_max_nr,
1769                .maxlen         = sizeof(aio_max_nr),
1770                .mode           = 0644,
1771                .proc_handler   = proc_doulongvec_minmax,
1772        },
1773#endif /* CONFIG_AIO */
1774#ifdef CONFIG_INOTIFY_USER
1775        {
1776                .procname       = "inotify",
1777                .mode           = 0555,
1778                .child          = inotify_table,
1779        },
1780#endif  
1781#ifdef CONFIG_EPOLL
1782        {
1783                .procname       = "epoll",
1784                .mode           = 0555,
1785                .child          = epoll_table,
1786        },
1787#endif
1788#endif
1789        {
1790                .procname       = "protected_symlinks",
1791                .data           = &sysctl_protected_symlinks,
1792                .maxlen         = sizeof(int),
1793                .mode           = 0600,
1794                .proc_handler   = proc_dointvec_minmax,
1795                .extra1         = &zero,
1796                .extra2         = &one,
1797        },
1798        {
1799                .procname       = "protected_hardlinks",
1800                .data           = &sysctl_protected_hardlinks,
1801                .maxlen         = sizeof(int),
1802                .mode           = 0600,
1803                .proc_handler   = proc_dointvec_minmax,
1804                .extra1         = &zero,
1805                .extra2         = &one,
1806        },
1807        {
1808                .procname       = "suid_dumpable",
1809                .data           = &suid_dumpable,
1810                .maxlen         = sizeof(int),
1811                .mode           = 0644,
1812                .proc_handler   = proc_dointvec_minmax_coredump,
1813                .extra1         = &zero,
1814                .extra2         = &two,
1815        },
1816#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1817        {
1818                .procname       = "binfmt_misc",
1819                .mode           = 0555,
1820                .child          = sysctl_mount_point,
1821        },
1822#endif
1823        {
1824                .procname       = "pipe-max-size",
1825                .data           = &pipe_max_size,
1826                .maxlen         = sizeof(pipe_max_size),
1827                .mode           = 0644,
1828                .proc_handler   = proc_dopipe_max_size,
1829        },
1830        {
1831                .procname       = "pipe-user-pages-hard",
1832                .data           = &pipe_user_pages_hard,
1833                .maxlen         = sizeof(pipe_user_pages_hard),
1834                .mode           = 0644,
1835                .proc_handler   = proc_doulongvec_minmax,
1836        },
1837        {
1838                .procname       = "pipe-user-pages-soft",
1839                .data           = &pipe_user_pages_soft,
1840                .maxlen         = sizeof(pipe_user_pages_soft),
1841                .mode           = 0644,
1842                .proc_handler   = proc_doulongvec_minmax,
1843        },
1844        {
1845                .procname       = "mount-max",
1846                .data           = &sysctl_mount_max,
1847                .maxlen         = sizeof(unsigned int),
1848                .mode           = 0644,
1849                .proc_handler   = proc_dointvec_minmax,
1850                .extra1         = &one,
1851        },
1852        { }
1853};
1854
1855static struct ctl_table debug_table[] = {
1856#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1857        {
1858                .procname       = "exception-trace",
1859                .data           = &show_unhandled_signals,
1860                .maxlen         = sizeof(int),
1861                .mode           = 0644,
1862                .proc_handler   = proc_dointvec
1863        },
1864#endif
1865#if defined(CONFIG_OPTPROBES)
1866        {
1867                .procname       = "kprobes-optimization",
1868                .data           = &sysctl_kprobes_optimization,
1869                .maxlen         = sizeof(int),
1870                .mode           = 0644,
1871                .proc_handler   = proc_kprobes_optimization_handler,
1872                .extra1         = &zero,
1873                .extra2         = &one,
1874        },
1875#endif
1876        { }
1877};
1878
1879static struct ctl_table dev_table[] = {
1880        { }
1881};
1882
1883int __init sysctl_init(void)
1884{
1885        struct ctl_table_header *hdr;
1886
1887        hdr = register_sysctl_table(sysctl_base_table);
1888        kmemleak_not_leak(hdr);
1889        return 0;
1890}
1891
1892#endif /* CONFIG_SYSCTL */
1893
1894/*
1895 * /proc/sys support
1896 */
1897
1898#ifdef CONFIG_PROC_SYSCTL
1899
1900static int _proc_do_string(char *data, int maxlen, int write,
1901                           char __user *buffer,
1902                           size_t *lenp, loff_t *ppos)
1903{
1904        size_t len;
1905        char __user *p;
1906        char c;
1907
1908        if (!data || !maxlen || !*lenp) {
1909                *lenp = 0;
1910                return 0;
1911        }
1912
1913        if (write) {
1914                if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1915                        /* Only continue writes not past the end of buffer. */
1916                        len = strlen(data);
1917                        if (len > maxlen - 1)
1918                                len = maxlen - 1;
1919
1920                        if (*ppos > len)
1921                                return 0;
1922                        len = *ppos;
1923                } else {
1924                        /* Start writing from beginning of buffer. */
1925                        len = 0;
1926                }
1927
1928                *ppos += *lenp;
1929                p = buffer;
1930                while ((p - buffer) < *lenp && len < maxlen - 1) {
1931                        if (get_user(c, p++))
1932                                return -EFAULT;
1933                        if (c == 0 || c == '\n')
1934                                break;
1935                        data[len++] = c;
1936                }
1937                data[len] = 0;
1938        } else {
1939                len = strlen(data);
1940                if (len > maxlen)
1941                        len = maxlen;
1942
1943                if (*ppos > len) {
1944                        *lenp = 0;
1945                        return 0;
1946                }
1947
1948                data += *ppos;
1949                len  -= *ppos;
1950
1951                if (len > *lenp)
1952                        len = *lenp;
1953                if (len)
1954                        if (copy_to_user(buffer, data, len))
1955                                return -EFAULT;
1956                if (len < *lenp) {
1957                        if (put_user('\n', buffer + len))
1958                                return -EFAULT;
1959                        len++;
1960                }
1961                *lenp = len;
1962                *ppos += len;
1963        }
1964        return 0;
1965}
1966
1967static void warn_sysctl_write(struct ctl_table *table)
1968{
1969        pr_warn_once("%s wrote to %s when file position was not 0!\n"
1970                "This will not be supported in the future. To silence this\n"
1971                "warning, set kernel.sysctl_writes_strict = -1\n",
1972                current->comm, table->procname);
1973}
1974
1975/**
1976 * proc_first_pos_non_zero_ignore - check if firs position is allowed
1977 * @ppos: file position
1978 * @table: the sysctl table
1979 *
1980 * Returns true if the first position is non-zero and the sysctl_writes_strict
1981 * mode indicates this is not allowed for numeric input types. String proc
1982 * hadlers can ignore the return value.
1983 */
1984static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
1985                                           struct ctl_table *table)
1986{
1987        if (!*ppos)
1988                return false;
1989
1990        switch (sysctl_writes_strict) {
1991        case SYSCTL_WRITES_STRICT:
1992                return true;
1993        case SYSCTL_WRITES_WARN:
1994                warn_sysctl_write(table);
1995                return false;
1996        default:
1997                return false;
1998        }
1999}
2000
2001/**
2002 * proc_dostring - read a string sysctl
2003 * @table: the sysctl table
2004 * @write: %TRUE if this is a write to the sysctl file
2005 * @buffer: the user buffer
2006 * @lenp: the size of the user buffer
2007 * @ppos: file position
2008 *
2009 * Reads/writes a string from/to the user buffer. If the kernel
2010 * buffer provided is not large enough to hold the string, the
2011 * string is truncated. The copied string is %NULL-terminated.
2012 * If the string is being read by the user process, it is copied
2013 * and a newline '\n' is added. It is truncated if the buffer is
2014 * not large enough.
2015 *
2016 * Returns 0 on success.
2017 */
2018int proc_dostring(struct ctl_table *table, int write,
2019                  void __user *buffer, size_t *lenp, loff_t *ppos)
2020{
2021        if (write)
2022                proc_first_pos_non_zero_ignore(ppos, table);
2023
2024        return _proc_do_string((char *)(table->data), table->maxlen, write,
2025                               (char __user *)buffer, lenp, ppos);
2026}
2027
2028static size_t proc_skip_spaces(char **buf)
2029{
2030        size_t ret;
2031        char *tmp = skip_spaces(*buf);
2032        ret = tmp - *buf;
2033        *buf = tmp;
2034        return ret;
2035}
2036
2037static void proc_skip_char(char **buf, size_t *size, const char v)
2038{
2039        while (*size) {
2040                if (**buf != v)
2041                        break;
2042                (*size)--;
2043                (*buf)++;
2044        }
2045}
2046
2047#define TMPBUFLEN 22
2048/**
2049 * proc_get_long - reads an ASCII formatted integer from a user buffer
2050 *
2051 * @buf: a kernel buffer
2052 * @size: size of the kernel buffer
2053 * @val: this is where the number will be stored
2054 * @neg: set to %TRUE if number is negative
2055 * @perm_tr: a vector which contains the allowed trailers
2056 * @perm_tr_len: size of the perm_tr vector
2057 * @tr: pointer to store the trailer character
2058 *
2059 * In case of success %0 is returned and @buf and @size are updated with
2060 * the amount of bytes read. If @tr is non-NULL and a trailing
2061 * character exists (size is non-zero after returning from this
2062 * function), @tr is updated with the trailing character.
2063 */
2064static int proc_get_long(char **buf, size_t *size,
2065                          unsigned long *val, bool *neg,
2066                          const char *perm_tr, unsigned perm_tr_len, char *tr)
2067{
2068        int len;
2069        char *p, tmp[TMPBUFLEN];
2070
2071        if (!*size)
2072                return -EINVAL;
2073
2074        len = *size;
2075        if (len > TMPBUFLEN - 1)
2076                len = TMPBUFLEN - 1;
2077
2078        memcpy(tmp, *buf, len);
2079
2080        tmp[len] = 0;
2081        p = tmp;
2082        if (*p == '-' && *size > 1) {
2083                *neg = true;
2084                p++;
2085        } else
2086                *neg = false;
2087        if (!isdigit(*p))
2088                return -EINVAL;
2089
2090        *val = simple_strtoul(p, &p, 0);
2091
2092        len = p - tmp;
2093
2094        /* We don't know if the next char is whitespace thus we may accept
2095         * invalid integers (e.g. 1234...a) or two integers instead of one
2096         * (e.g. 123...1). So lets not allow such large numbers. */
2097        if (len == TMPBUFLEN - 1)
2098                return -EINVAL;
2099
2100        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2101                return -EINVAL;
2102
2103        if (tr && (len < *size))
2104                *tr = *p;
2105
2106        *buf += len;
2107        *size -= len;
2108
2109        return 0;
2110}
2111
2112/**
2113 * proc_put_long - converts an integer to a decimal ASCII formatted string
2114 *
2115 * @buf: the user buffer
2116 * @size: the size of the user buffer
2117 * @val: the integer to be converted
2118 * @neg: sign of the number, %TRUE for negative
2119 *
2120 * In case of success %0 is returned and @buf and @size are updated with
2121 * the amount of bytes written.
2122 */
2123static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2124                          bool neg)
2125{
2126        int len;
2127        char tmp[TMPBUFLEN], *p = tmp;
2128
2129        sprintf(p, "%s%lu", neg ? "-" : "", val);
2130        len = strlen(tmp);
2131        if (len > *size)
2132                len = *size;
2133        if (copy_to_user(*buf, tmp, len))
2134                return -EFAULT;
2135        *size -= len;
2136        *buf += len;
2137        return 0;
2138}
2139#undef TMPBUFLEN
2140
2141static int proc_put_char(void __user **buf, size_t *size, char c)
2142{
2143        if (*size) {
2144                char __user **buffer = (char __user **)buf;
2145                if (put_user(c, *buffer))
2146                        return -EFAULT;
2147                (*size)--, (*buffer)++;
2148                *buf = *buffer;
2149        }
2150        return 0;
2151}
2152
2153static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2154                                 int *valp,
2155                                 int write, void *data)
2156{
2157        if (write) {
2158                if (*negp) {
2159                        if (*lvalp > (unsigned long) INT_MAX + 1)
2160                                return -EINVAL;
2161                        *valp = -*lvalp;
2162                } else {
2163                        if (*lvalp > (unsigned long) INT_MAX)
2164                                return -EINVAL;
2165                        *valp = *lvalp;
2166                }
2167        } else {
2168                int val = *valp;
2169                if (val < 0) {
2170                        *negp = true;
2171                        *lvalp = -(unsigned long)val;
2172                } else {
2173                        *negp = false;
2174                        *lvalp = (unsigned long)val;
2175                }
2176        }
2177        return 0;
2178}
2179
2180static int do_proc_douintvec_conv(unsigned long *lvalp,
2181                                  unsigned int *valp,
2182                                  int write, void *data)
2183{
2184        if (write) {
2185                if (*lvalp > UINT_MAX)
2186                        return -EINVAL;
2187                *valp = *lvalp;
2188        } else {
2189                unsigned int val = *valp;
2190                *lvalp = (unsigned long)val;
2191        }
2192        return 0;
2193}
2194
2195static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2196
2197static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2198                  int write, void __user *buffer,
2199                  size_t *lenp, loff_t *ppos,
2200                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2201                              int write, void *data),
2202                  void *data)
2203{
2204        int *i, vleft, first = 1, err = 0;
2205        size_t left;
2206        char *kbuf = NULL, *p;
2207        
2208        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2209                *lenp = 0;
2210                return 0;
2211        }
2212        
2213        i = (int *) tbl_data;
2214        vleft = table->maxlen / sizeof(*i);
2215        left = *lenp;
2216
2217        if (!conv)
2218                conv = do_proc_dointvec_conv;
2219
2220        if (write) {
2221                if (proc_first_pos_non_zero_ignore(ppos, table))
2222                        goto out;
2223
2224                if (left > PAGE_SIZE - 1)
2225                        left = PAGE_SIZE - 1;
2226                p = kbuf = memdup_user_nul(buffer, left);
2227                if (IS_ERR(kbuf))
2228                        return PTR_ERR(kbuf);
2229        }
2230
2231        for (; left && vleft--; i++, first=0) {
2232                unsigned long lval;
2233                bool neg;
2234
2235                if (write) {
2236                        left -= proc_skip_spaces(&p);
2237
2238                        if (!left)
2239                                break;
2240                        err = proc_get_long(&p, &left, &lval, &neg,
2241                                             proc_wspace_sep,
2242                                             sizeof(proc_wspace_sep), NULL);
2243                        if (err)
2244                                break;
2245                        if (conv(&neg, &lval, i, 1, data)) {
2246                                err = -EINVAL;
2247                                break;
2248                        }
2249                } else {
2250                        if (conv(&neg, &lval, i, 0, data)) {
2251                                err = -EINVAL;
2252                                break;
2253                        }
2254                        if (!first)
2255                                err = proc_put_char(&buffer, &left, '\t');
2256                        if (err)
2257                                break;
2258                        err = proc_put_long(&buffer, &left, lval, neg);
2259                        if (err)
2260                                break;
2261                }
2262        }
2263
2264        if (!write && !first && left && !err)
2265                err = proc_put_char(&buffer, &left, '\n');
2266        if (write && !err && left)
2267                left -= proc_skip_spaces(&p);
2268        if (write) {
2269                kfree(kbuf);
2270                if (first)
2271                        return err ? : -EINVAL;
2272        }
2273        *lenp -= left;
2274out:
2275        *ppos += *lenp;
2276        return err;
2277}
2278
2279static int do_proc_dointvec(struct ctl_table *table, int write,
2280                  void __user *buffer, size_t *lenp, loff_t *ppos,
2281                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2282                              int write, void *data),
2283                  void *data)
2284{
2285        return __do_proc_dointvec(table->data, table, write,
2286                        buffer, lenp, ppos, conv, data);
2287}
2288
2289static int do_proc_douintvec_w(unsigned int *tbl_data,
2290                               struct ctl_table *table,
2291                               void __user *buffer,
2292                               size_t *lenp, loff_t *ppos,
2293                               int (*conv)(unsigned long *lvalp,
2294                                           unsigned int *valp,
2295                                           int write, void *data),
2296                               void *data)
2297{
2298        unsigned long lval;
2299        int err = 0;
2300        size_t left;
2301        bool neg;
2302        char *kbuf = NULL, *p;
2303
2304        left = *lenp;
2305
2306        if (proc_first_pos_non_zero_ignore(ppos, table))
2307                goto bail_early;
2308
2309        if (left > PAGE_SIZE - 1)
2310                left = PAGE_SIZE - 1;
2311
2312        p = kbuf = memdup_user_nul(buffer, left);
2313        if (IS_ERR(kbuf))
2314                return -EINVAL;
2315
2316        left -= proc_skip_spaces(&p);
2317        if (!left) {
2318                err = -EINVAL;
2319                goto out_free;
2320        }
2321
2322        err = proc_get_long(&p, &left, &lval, &neg,
2323                             proc_wspace_sep,
2324                             sizeof(proc_wspace_sep), NULL);
2325        if (err || neg) {
2326                err = -EINVAL;
2327                goto out_free;
2328        }
2329
2330        if (conv(&lval, tbl_data, 1, data)) {
2331                err = -EINVAL;
2332                goto out_free;
2333        }
2334
2335        if (!err && left)
2336                left -= proc_skip_spaces(&p);
2337
2338out_free:
2339        kfree(kbuf);
2340        if (err)
2341                return -EINVAL;
2342
2343        return 0;
2344
2345        /* This is in keeping with old __do_proc_dointvec() */
2346bail_early:
2347        *ppos += *lenp;
2348        return err;
2349}
2350
2351static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2352                               size_t *lenp, loff_t *ppos,
2353                               int (*conv)(unsigned long *lvalp,
2354                                           unsigned int *valp,
2355                                           int write, void *data),
2356                               void *data)
2357{
2358        unsigned long lval;
2359        int err = 0;
2360        size_t left;
2361
2362        left = *lenp;
2363
2364        if (conv(&lval, tbl_data, 0, data)) {
2365                err = -EINVAL;
2366                goto out;
2367        }
2368
2369        err = proc_put_long(&buffer, &left, lval, false);
2370        if (err || !left)
2371                goto out;
2372
2373        err = proc_put_char(&buffer, &left, '\n');
2374
2375out:
2376        *lenp -= left;
2377        *ppos += *lenp;
2378
2379        return err;
2380}
2381
2382static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2383                               int write, void __user *buffer,
2384                               size_t *lenp, loff_t *ppos,
2385                               int (*conv)(unsigned long *lvalp,
2386                                           unsigned int *valp,
2387                                           int write, void *data),
2388                               void *data)
2389{
2390        unsigned int *i, vleft;
2391
2392        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2393                *lenp = 0;
2394                return 0;
2395        }
2396
2397        i = (unsigned int *) tbl_data;
2398        vleft = table->maxlen / sizeof(*i);
2399
2400        /*
2401         * Arrays are not supported, keep this simple. *Do not* add
2402         * support for them.
2403         */
2404        if (vleft != 1) {
2405                *lenp = 0;
2406                return -EINVAL;
2407        }
2408
2409        if (!conv)
2410                conv = do_proc_douintvec_conv;
2411
2412        if (write)
2413                return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2414                                           conv, data);
2415        return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2416}
2417
2418static int do_proc_douintvec(struct ctl_table *table, int write,
2419                             void __user *buffer, size_t *lenp, loff_t *ppos,
2420                             int (*conv)(unsigned long *lvalp,
2421                                         unsigned int *valp,
2422                                         int write, void *data),
2423                             void *data)
2424{
2425        return __do_proc_douintvec(table->data, table, write,
2426                                   buffer, lenp, ppos, conv, data);
2427}
2428
2429/**
2430 * proc_dointvec - read a vector of integers
2431 * @table: the sysctl table
2432 * @write: %TRUE if this is a write to the sysctl file
2433 * @buffer: the user buffer
2434 * @lenp: the size of the user buffer
2435 * @ppos: file position
2436 *
2437 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2438 * values from/to the user buffer, treated as an ASCII string. 
2439 *
2440 * Returns 0 on success.
2441 */
2442int proc_dointvec(struct ctl_table *table, int write,
2443                     void __user *buffer, size_t *lenp, loff_t *ppos)
2444{
2445        return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2446}
2447
2448/**
2449 * proc_douintvec - read a vector of unsigned integers
2450 * @table: the sysctl table
2451 * @write: %TRUE if this is a write to the sysctl file
2452 * @buffer: the user buffer
2453 * @lenp: the size of the user buffer
2454 * @ppos: file position
2455 *
2456 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2457 * values from/to the user buffer, treated as an ASCII string.
2458 *
2459 * Returns 0 on success.
2460 */
2461int proc_douintvec(struct ctl_table *table, int write,
2462                     void __user *buffer, size_t *lenp, loff_t *ppos)
2463{
2464        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2465                                 do_proc_douintvec_conv, NULL);
2466}
2467
2468/*
2469 * Taint values can only be increased
2470 * This means we can safely use a temporary.
2471 */
2472static int proc_taint(struct ctl_table *table, int write,
2473                               void __user *buffer, size_t *lenp, loff_t *ppos)
2474{
2475        struct ctl_table t;
2476        unsigned long tmptaint = get_taint();
2477        int err;
2478
2479        if (write && !capable(CAP_SYS_ADMIN))
2480                return -EPERM;
2481
2482        t = *table;
2483        t.data = &tmptaint;
2484        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2485        if (err < 0)
2486                return err;
2487
2488        if (write) {
2489                /*
2490                 * Poor man's atomic or. Not worth adding a primitive
2491                 * to everyone's atomic.h for this
2492                 */
2493                int i;
2494                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2495                        if ((tmptaint >> i) & 1)
2496                                add_taint(i, LOCKDEP_STILL_OK);
2497                }
2498        }
2499
2500        return err;
2501}
2502
2503#ifdef CONFIG_PRINTK
2504static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2505                                void __user *buffer, size_t *lenp, loff_t *ppos)
2506{
2507        if (write && !capable(CAP_SYS_ADMIN))
2508                return -EPERM;
2509
2510        return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2511}
2512#endif
2513
2514/**
2515 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2516 * @min: pointer to minimum allowable value
2517 * @max: pointer to maximum allowable value
2518 *
2519 * The do_proc_dointvec_minmax_conv_param structure provides the
2520 * minimum and maximum values for doing range checking for those sysctl
2521 * parameters that use the proc_dointvec_minmax() handler.
2522 */
2523struct do_proc_dointvec_minmax_conv_param {
2524        int *min;
2525        int *max;
2526};
2527
2528static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2529                                        int *valp,
2530                                        int write, void *data)
2531{
2532        struct do_proc_dointvec_minmax_conv_param *param = data;
2533        if (write) {
2534                int val = *negp ? -*lvalp : *lvalp;
2535                if ((param->min && *param->min > val) ||
2536                    (param->max && *param->max < val))
2537                        return -EINVAL;
2538                *valp = val;
2539        } else {
2540                int val = *valp;
2541                if (val < 0) {
2542                        *negp = true;
2543                        *lvalp = -(unsigned long)val;
2544                } else {
2545                        *negp = false;
2546                        *lvalp = (unsigned long)val;
2547                }
2548        }
2549        return 0;
2550}
2551
2552/**
2553 * proc_dointvec_minmax - read a vector of integers with min/max values
2554 * @table: the sysctl table
2555 * @write: %TRUE if this is a write to the sysctl file
2556 * @buffer: the user buffer
2557 * @lenp: the size of the user buffer
2558 * @ppos: file position
2559 *
2560 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2561 * values from/to the user buffer, treated as an ASCII string.
2562 *
2563 * This routine will ensure the values are within the range specified by
2564 * table->extra1 (min) and table->extra2 (max).
2565 *
2566 * Returns 0 on success or -EINVAL on write when the range check fails.
2567 */
2568int proc_dointvec_minmax(struct ctl_table *table, int write,
2569                  void __user *buffer, size_t *lenp, loff_t *ppos)
2570{
2571        struct do_proc_dointvec_minmax_conv_param param = {
2572                .min = (int *) table->extra1,
2573                .max = (int *) table->extra2,
2574        };
2575        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2576                                do_proc_dointvec_minmax_conv, &param);
2577}
2578
2579/**
2580 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2581 * @min: pointer to minimum allowable value
2582 * @max: pointer to maximum allowable value
2583 *
2584 * The do_proc_douintvec_minmax_conv_param structure provides the
2585 * minimum and maximum values for doing range checking for those sysctl
2586 * parameters that use the proc_douintvec_minmax() handler.
2587 */
2588struct do_proc_douintvec_minmax_conv_param {
2589        unsigned int *min;
2590        unsigned int *max;
2591};
2592
2593static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2594                                         unsigned int *valp,
2595                                         int write, void *data)
2596{
2597        struct do_proc_douintvec_minmax_conv_param *param = data;
2598
2599        if (write) {
2600                unsigned int val = *lvalp;
2601
2602                if (*lvalp > UINT_MAX)
2603                        return -EINVAL;
2604
2605                if ((param->min && *param->min > val) ||
2606                    (param->max && *param->max < val))
2607                        return -ERANGE;
2608
2609                *valp = val;
2610        } else {
2611                unsigned int val = *valp;
2612                *lvalp = (unsigned long) val;
2613        }
2614
2615        return 0;
2616}
2617
2618/**
2619 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2620 * @table: the sysctl table
2621 * @write: %TRUE if this is a write to the sysctl file
2622 * @buffer: the user buffer
2623 * @lenp: the size of the user buffer
2624 * @ppos: file position
2625 *
2626 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2627 * values from/to the user buffer, treated as an ASCII string. Negative
2628 * strings are not allowed.
2629 *
2630 * This routine will ensure the values are within the range specified by
2631 * table->extra1 (min) and table->extra2 (max). There is a final sanity
2632 * check for UINT_MAX to avoid having to support wrap around uses from
2633 * userspace.
2634 *
2635 * Returns 0 on success or -ERANGE on write when the range check fails.
2636 */
2637int proc_douintvec_minmax(struct ctl_table *table, int write,
2638                          void __user *buffer, size_t *lenp, loff_t *ppos)
2639{
2640        struct do_proc_douintvec_minmax_conv_param param = {
2641                .min = (unsigned int *) table->extra1,
2642                .max = (unsigned int *) table->extra2,
2643        };
2644        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2645                                 do_proc_douintvec_minmax_conv, &param);
2646}
2647
2648static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2649                                        unsigned int *valp,
2650                                        int write, void *data)
2651{
2652        if (write) {
2653                unsigned int val;
2654
2655                val = round_pipe_size(*lvalp);
2656                if (val == 0)
2657                        return -EINVAL;
2658
2659                *valp = val;
2660        } else {
2661                unsigned int val = *valp;
2662                *lvalp = (unsigned long) val;
2663        }
2664
2665        return 0;
2666}
2667
2668static int proc_dopipe_max_size(struct ctl_table *table, int write,
2669                                void __user *buffer, size_t *lenp, loff_t *ppos)
2670{
2671        return do_proc_douintvec(table, write, buffer, lenp, ppos,
2672                                 do_proc_dopipe_max_size_conv, NULL);
2673}
2674
2675static void validate_coredump_safety(void)
2676{
2677#ifdef CONFIG_COREDUMP
2678        if (suid_dumpable == SUID_DUMP_ROOT &&
2679            core_pattern[0] != '/' && core_pattern[0] != '|') {
2680                printk(KERN_WARNING
2681"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2682"Pipe handler or fully qualified core dump path required.\n"
2683"Set kernel.core_pattern before fs.suid_dumpable.\n"
2684                );
2685        }
2686#endif
2687}
2688
2689static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2690                void __user *buffer, size_t *lenp, loff_t *ppos)
2691{
2692        int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2693        if (!error)
2694                validate_coredump_safety();
2695        return error;
2696}
2697
2698#ifdef CONFIG_COREDUMP
2699static int proc_dostring_coredump(struct ctl_table *table, int write,
2700                  void __user *buffer, size_t *lenp, loff_t *ppos)
2701{
2702        int error = proc_dostring(table, write, buffer, lenp, ppos);
2703        if (!error)
2704                validate_coredump_safety();
2705        return error;
2706}
2707#endif
2708
2709static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2710                                     void __user *buffer,
2711                                     size_t *lenp, loff_t *ppos,
2712                                     unsigned long convmul,
2713                                     unsigned long convdiv)
2714{
2715        unsigned long *i, *min, *max;
2716        int vleft, first = 1, err = 0;
2717        size_t left;
2718        char *kbuf = NULL, *p;
2719
2720        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2721                *lenp = 0;
2722                return 0;
2723        }
2724
2725        i = (unsigned long *) data;
2726        min = (unsigned long *) table->extra1;
2727        max = (unsigned long *) table->extra2;
2728        vleft = table->maxlen / sizeof(unsigned long);
2729        left = *lenp;
2730
2731        if (write) {
2732                if (proc_first_pos_non_zero_ignore(ppos, table))
2733                        goto out;
2734
2735                if (left > PAGE_SIZE - 1)
2736                        left = PAGE_SIZE - 1;
2737                p = kbuf = memdup_user_nul(buffer, left);
2738                if (IS_ERR(kbuf))
2739                        return PTR_ERR(kbuf);
2740        }
2741
2742        for (; left && vleft--; i++, first = 0) {
2743                unsigned long val;
2744
2745                if (write) {
2746                        bool neg;
2747
2748                        left -= proc_skip_spaces(&p);
2749
2750                        err = proc_get_long(&p, &left, &val, &neg,
2751                                             proc_wspace_sep,
2752                                             sizeof(proc_wspace_sep), NULL);
2753                        if (err)
2754                                break;
2755                        if (neg)
2756                                continue;
2757                        val = convmul * val / convdiv;
2758                        if ((min && val < *min) || (max && val > *max))
2759                                continue;
2760                        *i = val;
2761                } else {
2762                        val = convdiv * (*i) / convmul;
2763                        if (!first) {
2764                                err = proc_put_char(&buffer, &left, '\t');
2765                                if (err)
2766                                        break;
2767                        }
2768                        err = proc_put_long(&buffer, &left, val, false);
2769                        if (err)
2770                                break;
2771                }
2772        }
2773
2774        if (!write && !first && left && !err)
2775                err = proc_put_char(&buffer, &left, '\n');
2776        if (write && !err)
2777                left -= proc_skip_spaces(&p);
2778        if (write) {
2779                kfree(kbuf);
2780                if (first)
2781                        return err ? : -EINVAL;
2782        }
2783        *lenp -= left;
2784out:
2785        *ppos += *lenp;
2786        return err;
2787}
2788
2789static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2790                                     void __user *buffer,
2791                                     size_t *lenp, loff_t *ppos,
2792                                     unsigned long convmul,
2793                                     unsigned long convdiv)
2794{
2795        return __do_proc_doulongvec_minmax(table->data, table, write,
2796                        buffer, lenp, ppos, convmul, convdiv);
2797}
2798
2799/**
2800 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2801 * @table: the sysctl table
2802 * @write: %TRUE if this is a write to the sysctl file
2803 * @buffer: the user buffer
2804 * @lenp: the size of the user buffer
2805 * @ppos: file position
2806 *
2807 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2808 * values from/to the user buffer, treated as an ASCII string.
2809 *
2810 * This routine will ensure the values are within the range specified by
2811 * table->extra1 (min) and table->extra2 (max).
2812 *
2813 * Returns 0 on success.
2814 */
2815int proc_doulongvec_minmax(struct ctl_table *table, int write,
2816                           void __user *buffer, size_t *lenp, loff_t *ppos)
2817{
2818    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2819}
2820
2821/**
2822 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2823 * @table: the sysctl table
2824 * @write: %TRUE if this is a write to the sysctl file
2825 * @buffer: the user buffer
2826 * @lenp: the size of the user buffer
2827 * @ppos: file position
2828 *
2829 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2830 * values from/to the user buffer, treated as an ASCII string. The values
2831 * are treated as milliseconds, and converted to jiffies when they are stored.
2832 *
2833 * This routine will ensure the values are within the range specified by
2834 * table->extra1 (min) and table->extra2 (max).
2835 *
2836 * Returns 0 on success.
2837 */
2838int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2839                                      void __user *buffer,
2840                                      size_t *lenp, loff_t *ppos)
2841{
2842    return do_proc_doulongvec_minmax(table, write, buffer,
2843                                     lenp, ppos, HZ, 1000l);
2844}
2845
2846
2847static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2848                                         int *valp,
2849                                         int write, void *data)
2850{
2851        if (write) {
2852                if (*lvalp > INT_MAX / HZ)
2853                        return 1;
2854                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2855        } else {
2856                int val = *valp;
2857                unsigned long lval;
2858                if (val < 0) {
2859                        *negp = true;
2860                        lval = -(unsigned long)val;
2861                } else {
2862                        *negp = false;
2863                        lval = (unsigned long)val;
2864                }
2865                *lvalp = lval / HZ;
2866        }
2867        return 0;
2868}
2869
2870static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2871                                                int *valp,
2872                                                int write, void *data)
2873{
2874        if (write) {
2875                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2876                        return 1;
2877                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2878        } else {
2879                int val = *valp;
2880                unsigned long lval;
2881                if (val < 0) {
2882                        *negp = true;
2883                        lval = -(unsigned long)val;
2884                } else {
2885                        *negp = false;
2886                        lval = (unsigned long)val;
2887                }
2888                *lvalp = jiffies_to_clock_t(lval);
2889        }
2890        return 0;
2891}
2892
2893static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2894                                            int *valp,
2895                                            int write, void *data)
2896{
2897        if (write) {
2898                unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2899
2900                if (jif > INT_MAX)
2901                        return 1;
2902                *valp = (int)jif;
2903        } else {
2904                int val = *valp;
2905                unsigned long lval;
2906                if (val < 0) {
2907                        *negp = true;
2908                        lval = -(unsigned long)val;
2909                } else {
2910                        *negp = false;
2911                        lval = (unsigned long)val;
2912                }
2913                *lvalp = jiffies_to_msecs(lval);
2914        }
2915        return 0;
2916}
2917
2918/**
2919 * proc_dointvec_jiffies - read a vector of integers as seconds
2920 * @table: the sysctl table
2921 * @write: %TRUE if this is a write to the sysctl file
2922 * @buffer: the user buffer
2923 * @lenp: the size of the user buffer
2924 * @ppos: file position
2925 *
2926 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2927 * values from/to the user buffer, treated as an ASCII string. 
2928 * The values read are assumed to be in seconds, and are converted into
2929 * jiffies.
2930 *
2931 * Returns 0 on success.
2932 */
2933int proc_dointvec_jiffies(struct ctl_table *table, int write,
2934                          void __user *buffer, size_t *lenp, loff_t *ppos)
2935{
2936    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2937                            do_proc_dointvec_jiffies_conv,NULL);
2938}
2939
2940/**
2941 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2942 * @table: the sysctl table
2943 * @write: %TRUE if this is a write to the sysctl file
2944 * @buffer: the user buffer
2945 * @lenp: the size of the user buffer
2946 * @ppos: pointer to the file position
2947 *
2948 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2949 * values from/to the user buffer, treated as an ASCII string. 
2950 * The values read are assumed to be in 1/USER_HZ seconds, and 
2951 * are converted into jiffies.
2952 *
2953 * Returns 0 on success.
2954 */
2955int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2956                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2957{
2958    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2959                            do_proc_dointvec_userhz_jiffies_conv,NULL);
2960}
2961
2962/**
2963 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2964 * @table: the sysctl table
2965 * @write: %TRUE if this is a write to the sysctl file
2966 * @buffer: the user buffer
2967 * @lenp: the size of the user buffer
2968 * @ppos: file position
2969 * @ppos: the current position in the file
2970 *
2971 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2972 * values from/to the user buffer, treated as an ASCII string. 
2973 * The values read are assumed to be in 1/1000 seconds, and 
2974 * are converted into jiffies.
2975 *
2976 * Returns 0 on success.
2977 */
2978int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2979                             void __user *buffer, size_t *lenp, loff_t *ppos)
2980{
2981        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2982                                do_proc_dointvec_ms_jiffies_conv, NULL);
2983}
2984
2985static int proc_do_cad_pid(struct ctl_table *table, int write,
2986                           void __user *buffer, size_t *lenp, loff_t *ppos)
2987{
2988        struct pid *new_pid;
2989        pid_t tmp;
2990        int r;
2991
2992        tmp = pid_vnr(cad_pid);
2993
2994        r = __do_proc_dointvec(&tmp, table, write, buffer,
2995                               lenp, ppos, NULL, NULL);
2996        if (r || !write)
2997                return r;
2998
2999        new_pid = find_get_pid(tmp);
3000        if (!new_pid)
3001                return -ESRCH;
3002
3003        put_pid(xchg(&cad_pid, new_pid));
3004        return 0;
3005}
3006
3007/**
3008 * proc_do_large_bitmap - read/write from/to a large bitmap
3009 * @table: the sysctl table
3010 * @write: %TRUE if this is a write to the sysctl file
3011 * @buffer: the user buffer
3012 * @lenp: the size of the user buffer
3013 * @ppos: file position
3014 *
3015 * The bitmap is stored at table->data and the bitmap length (in bits)
3016 * in table->maxlen.
3017 *
3018 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3019 * large bitmaps may be represented in a compact manner. Writing into
3020 * the file will clear the bitmap then update it with the given input.
3021 *
3022 * Returns 0 on success.
3023 */
3024int proc_do_large_bitmap(struct ctl_table *table, int write,
3025                         void __user *buffer, size_t *lenp, loff_t *ppos)
3026{
3027        int err = 0;
3028        bool first = 1;
3029        size_t left = *lenp;
3030        unsigned long bitmap_len = table->maxlen;
3031        unsigned long *bitmap = *(unsigned long **) table->data;
3032        unsigned long *tmp_bitmap = NULL;
3033        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3034
3035        if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3036                *lenp = 0;
3037                return 0;
3038        }
3039
3040        if (write) {
3041                char *kbuf, *p;
3042
3043                if (left > PAGE_SIZE - 1)
3044                        left = PAGE_SIZE - 1;
3045
3046                p = kbuf = memdup_user_nul(buffer, left);
3047                if (IS_ERR(kbuf))
3048                        return PTR_ERR(kbuf);
3049
3050                tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
3051                                     GFP_KERNEL);
3052                if (!tmp_bitmap) {
3053                        kfree(kbuf);
3054                        return -ENOMEM;
3055                }
3056                proc_skip_char(&p, &left, '\n');
3057                while (!err && left) {
3058                        unsigned long val_a, val_b;
3059                        bool neg;
3060
3061                        err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3062                                             sizeof(tr_a), &c);
3063                        if (err)
3064                                break;
3065                        if (val_a >= bitmap_len || neg) {
3066                                err = -EINVAL;
3067                                break;
3068                        }
3069
3070                        val_b = val_a;
3071                        if (left) {
3072                                p++;
3073                                left--;
3074                        }
3075
3076                        if (c == '-') {
3077                                err = proc_get_long(&p, &left, &val_b,
3078                                                     &neg, tr_b, sizeof(tr_b),
3079                                                     &c);
3080                                if (err)
3081                                        break;
3082                                if (val_b >= bitmap_len || neg ||
3083                                    val_a > val_b) {
3084                                        err = -EINVAL;
3085                                        break;
3086                                }
3087                                if (left) {
3088                                        p++;
3089                                        left--;
3090                                }
3091                        }
3092
3093                        bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3094                        first = 0;
3095                        proc_skip_char(&p, &left, '\n');
3096                }
3097                kfree(kbuf);
3098        } else {
3099                unsigned long bit_a, bit_b = 0;
3100
3101                while (left) {
3102                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3103                        if (bit_a >= bitmap_len)
3104                                break;
3105                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
3106                                                   bit_a + 1) - 1;
3107
3108                        if (!first) {
3109                                err = proc_put_char(&buffer, &left, ',');
3110                                if (err)
3111                                        break;
3112                        }
3113                        err = proc_put_long(&buffer, &left, bit_a, false);
3114                        if (err)
3115                                break;
3116                        if (bit_a != bit_b) {
3117                                err = proc_put_char(&buffer, &left, '-');
3118                                if (err)
3119                                        break;
3120                                err = proc_put_long(&buffer, &left, bit_b, false);
3121                                if (err)
3122                                        break;
3123                        }
3124
3125                        first = 0; bit_b++;
3126                }
3127                if (!err)
3128                        err = proc_put_char(&buffer, &left, '\n');
3129        }
3130
3131        if (!err) {
3132                if (write) {
3133                        if (*ppos)
3134                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3135                        else
3136                                bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3137                }
3138                *lenp -= left;
3139                *ppos += *lenp;
3140        }
3141
3142        kfree(tmp_bitmap);
3143        return err;
3144}
3145
3146#else /* CONFIG_PROC_SYSCTL */
3147
3148int proc_dostring(struct ctl_table *table, int write,
3149                  void __user *buffer, size_t *lenp, loff_t *ppos)
3150{
3151        return -ENOSYS;
3152}
3153
3154int proc_dointvec(struct ctl_table *table, int write,
3155                  void __user *buffer, size_t *lenp, loff_t *ppos)
3156{
3157        return -ENOSYS;
3158}
3159
3160int proc_douintvec(struct ctl_table *table, int write,
3161                  void __user *buffer, size_t *lenp, loff_t *ppos)
3162{
3163        return -ENOSYS;
3164}
3165
3166int proc_dointvec_minmax(struct ctl_table *table, int write,
3167                    void __user *buffer, size_t *lenp, loff_t *ppos)
3168{
3169        return -ENOSYS;
3170}
3171
3172int proc_douintvec_minmax(struct ctl_table *table, int write,
3173                          void __user *buffer, size_t *lenp, loff_t *ppos)
3174{
3175        return -ENOSYS;
3176}
3177
3178int proc_dointvec_jiffies(struct ctl_table *table, int write,
3179                    void __user *buffer, size_t *lenp, loff_t *ppos)
3180{
3181        return -ENOSYS;
3182}
3183
3184int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3185                    void __user *buffer, size_t *lenp, loff_t *ppos)
3186{
3187        return -ENOSYS;
3188}
3189
3190int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3191                             void __user *buffer, size_t *lenp, loff_t *ppos)
3192{
3193        return -ENOSYS;
3194}
3195
3196int proc_doulongvec_minmax(struct ctl_table *table, int write,
3197                    void __user *buffer, size_t *lenp, loff_t *ppos)
3198{
3199        return -ENOSYS;
3200}
3201
3202int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3203                                      void __user *buffer,
3204                                      size_t *lenp, loff_t *ppos)
3205{
3206    return -ENOSYS;
3207}
3208
3209
3210#endif /* CONFIG_PROC_SYSCTL */
3211
3212/*
3213 * No sense putting this after each symbol definition, twice,
3214 * exception granted :-)
3215 */
3216EXPORT_SYMBOL(proc_dointvec);
3217EXPORT_SYMBOL(proc_douintvec);
3218EXPORT_SYMBOL(proc_dointvec_jiffies);
3219EXPORT_SYMBOL(proc_dointvec_minmax);
3220EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3221EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3222EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3223EXPORT_SYMBOL(proc_dostring);
3224EXPORT_SYMBOL(proc_doulongvec_minmax);
3225EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3226