linux/kernel/sysctl.c
<<
>>
Prefs
   1/*
   2 * sysctl.c: General linux system control interface
   3 *
   4 * Begun 24 March 1995, Stephen Tweedie
   5 * Added /proc support, Dec 1995
   6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
   7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
   8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
   9 * Dynamic registration fixes, Stephen Tweedie.
  10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
  11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
  12 *  Horn.
  13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
  14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
  16 *  Wendling.
  17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
  18 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/mm.h>
  23#include <linux/swap.h>
  24#include <linux/slab.h>
  25#include <linux/sysctl.h>
  26#include <linux/proc_fs.h>
  27#include <linux/security.h>
  28#include <linux/ctype.h>
  29#include <linux/kmemcheck.h>
  30#include <linux/smp_lock.h>
  31#include <linux/fs.h>
  32#include <linux/init.h>
  33#include <linux/kernel.h>
  34#include <linux/kobject.h>
  35#include <linux/net.h>
  36#include <linux/sysrq.h>
  37#include <linux/highuid.h>
  38#include <linux/writeback.h>
  39#include <linux/hugetlb.h>
  40#include <linux/initrd.h>
  41#include <linux/key.h>
  42#include <linux/times.h>
  43#include <linux/limits.h>
  44#include <linux/dcache.h>
  45#include <linux/syscalls.h>
  46#include <linux/vmstat.h>
  47#include <linux/nfs_fs.h>
  48#include <linux/acpi.h>
  49#include <linux/reboot.h>
  50#include <linux/ftrace.h>
  51#include <linux/slow-work.h>
  52#include <linux/perf_event.h>
  53
  54#include <asm/uaccess.h>
  55#include <asm/processor.h>
  56
  57#ifdef CONFIG_X86
  58#include <asm/nmi.h>
  59#include <asm/stacktrace.h>
  60#include <asm/io.h>
  61#endif
  62
  63static int deprecated_sysctl_warning(struct __sysctl_args *args);
  64
  65#if defined(CONFIG_SYSCTL)
  66
  67/* External variables not in a header file. */
  68extern int C_A_D;
  69extern int print_fatal_signals;
  70extern int sysctl_overcommit_memory;
  71extern int sysctl_overcommit_ratio;
  72extern int sysctl_panic_on_oom;
  73extern int sysctl_oom_kill_allocating_task;
  74extern int sysctl_oom_dump_tasks;
  75extern int max_threads;
  76extern int core_uses_pid;
  77extern int suid_dumpable;
  78extern char core_pattern[];
  79extern unsigned int core_pipe_limit;
  80extern int pid_max;
  81extern int min_free_kbytes;
  82extern int pid_max_min, pid_max_max;
  83extern int sysctl_drop_caches;
  84extern int percpu_pagelist_fraction;
  85extern int compat_log;
  86extern int latencytop_enabled;
  87extern int sysctl_nr_open_min, sysctl_nr_open_max;
  88#ifndef CONFIG_MMU
  89extern int sysctl_nr_trim_pages;
  90#endif
  91#ifdef CONFIG_RCU_TORTURE_TEST
  92extern int rcutorture_runnable;
  93#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
  94#ifdef CONFIG_BLOCK
  95extern int blk_iopoll_enabled;
  96#endif
  97
  98/* Constants used for minimum and  maximum */
  99#ifdef CONFIG_DETECT_SOFTLOCKUP
 100static int sixty = 60;
 101static int neg_one = -1;
 102#endif
 103
 104static int zero;
 105static int __maybe_unused one = 1;
 106static int __maybe_unused two = 2;
 107static unsigned long one_ul = 1;
 108static int one_hundred = 100;
 109#ifdef CONFIG_PRINTK
 110static int ten_thousand = 10000;
 111#endif
 112
 113/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 114static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 115
 116/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 117static int maxolduid = 65535;
 118static int minolduid;
 119static int min_percpu_pagelist_fract = 8;
 120
 121static int ngroups_max = NGROUPS_MAX;
 122
 123#ifdef CONFIG_MODULES
 124extern char modprobe_path[];
 125extern int modules_disabled;
 126#endif
 127#ifdef CONFIG_CHR_DEV_SG
 128extern int sg_big_buff;
 129#endif
 130
 131#ifdef CONFIG_SPARC
 132#include <asm/system.h>
 133#endif
 134
 135#ifdef CONFIG_SPARC64
 136extern int sysctl_tsb_ratio;
 137#endif
 138
 139#ifdef __hppa__
 140extern int pwrsw_enabled;
 141extern int unaligned_enabled;
 142#endif
 143
 144#ifdef CONFIG_S390
 145#ifdef CONFIG_MATHEMU
 146extern int sysctl_ieee_emulation_warnings;
 147#endif
 148extern int sysctl_userprocess_debug;
 149extern int spin_retry;
 150#endif
 151
 152#ifdef CONFIG_BSD_PROCESS_ACCT
 153extern int acct_parm[];
 154#endif
 155
 156#ifdef CONFIG_IA64
 157extern int no_unaligned_warning;
 158extern int unaligned_dump_stack;
 159#endif
 160
 161#ifdef CONFIG_RT_MUTEXES
 162extern int max_lock_depth;
 163#endif
 164
 165#ifdef CONFIG_PROC_SYSCTL
 166static int proc_do_cad_pid(struct ctl_table *table, int write,
 167                  void __user *buffer, size_t *lenp, loff_t *ppos);
 168static int proc_taint(struct ctl_table *table, int write,
 169                               void __user *buffer, size_t *lenp, loff_t *ppos);
 170#endif
 171
 172static struct ctl_table root_table[];
 173static struct ctl_table_root sysctl_table_root;
 174static struct ctl_table_header root_table_header = {
 175        .count = 1,
 176        .ctl_table = root_table,
 177        .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
 178        .root = &sysctl_table_root,
 179        .set = &sysctl_table_root.default_set,
 180};
 181static struct ctl_table_root sysctl_table_root = {
 182        .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
 183        .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
 184};
 185
 186static struct ctl_table kern_table[];
 187static struct ctl_table vm_table[];
 188static struct ctl_table fs_table[];
 189static struct ctl_table debug_table[];
 190static struct ctl_table dev_table[];
 191extern struct ctl_table random_table[];
 192#ifdef CONFIG_INOTIFY_USER
 193extern struct ctl_table inotify_table[];
 194#endif
 195#ifdef CONFIG_EPOLL
 196extern struct ctl_table epoll_table[];
 197#endif
 198
 199#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
 200int sysctl_legacy_va_layout;
 201#endif
 202
 203extern int prove_locking;
 204extern int lock_stat;
 205
 206/* The default sysctl tables: */
 207
 208static struct ctl_table root_table[] = {
 209        {
 210                .ctl_name       = CTL_KERN,
 211                .procname       = "kernel",
 212                .mode           = 0555,
 213                .child          = kern_table,
 214        },
 215        {
 216                .ctl_name       = CTL_VM,
 217                .procname       = "vm",
 218                .mode           = 0555,
 219                .child          = vm_table,
 220        },
 221        {
 222                .ctl_name       = CTL_FS,
 223                .procname       = "fs",
 224                .mode           = 0555,
 225                .child          = fs_table,
 226        },
 227        {
 228                .ctl_name       = CTL_DEBUG,
 229                .procname       = "debug",
 230                .mode           = 0555,
 231                .child          = debug_table,
 232        },
 233        {
 234                .ctl_name       = CTL_DEV,
 235                .procname       = "dev",
 236                .mode           = 0555,
 237                .child          = dev_table,
 238        },
 239/*
 240 * NOTE: do not add new entries to this table unless you have read
 241 * Documentation/sysctl/ctl_unnumbered.txt
 242 */
 243        { .ctl_name = 0 }
 244};
 245
 246#ifdef CONFIG_SCHED_DEBUG
 247static int min_sched_granularity_ns = 100000;           /* 100 usecs */
 248static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
 249static int min_wakeup_granularity_ns;                   /* 0 usecs */
 250static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
 251#endif
 252
 253static struct ctl_table kern_table[] = {
 254        {
 255                .ctl_name       = CTL_UNNUMBERED,
 256                .procname       = "sched_child_runs_first",
 257                .data           = &sysctl_sched_child_runs_first,
 258                .maxlen         = sizeof(unsigned int),
 259                .mode           = 0644,
 260                .proc_handler   = &proc_dointvec,
 261        },
 262#ifdef CONFIG_SCHED_DEBUG
 263        {
 264                .ctl_name       = CTL_UNNUMBERED,
 265                .procname       = "sched_min_granularity_ns",
 266                .data           = &sysctl_sched_min_granularity,
 267                .maxlen         = sizeof(unsigned int),
 268                .mode           = 0644,
 269                .proc_handler   = &sched_nr_latency_handler,
 270                .strategy       = &sysctl_intvec,
 271                .extra1         = &min_sched_granularity_ns,
 272                .extra2         = &max_sched_granularity_ns,
 273        },
 274        {
 275                .ctl_name       = CTL_UNNUMBERED,
 276                .procname       = "sched_latency_ns",
 277                .data           = &sysctl_sched_latency,
 278                .maxlen         = sizeof(unsigned int),
 279                .mode           = 0644,
 280                .proc_handler   = &sched_nr_latency_handler,
 281                .strategy       = &sysctl_intvec,
 282                .extra1         = &min_sched_granularity_ns,
 283                .extra2         = &max_sched_granularity_ns,
 284        },
 285        {
 286                .ctl_name       = CTL_UNNUMBERED,
 287                .procname       = "sched_wakeup_granularity_ns",
 288                .data           = &sysctl_sched_wakeup_granularity,
 289                .maxlen         = sizeof(unsigned int),
 290                .mode           = 0644,
 291                .proc_handler   = &proc_dointvec_minmax,
 292                .strategy       = &sysctl_intvec,
 293                .extra1         = &min_wakeup_granularity_ns,
 294                .extra2         = &max_wakeup_granularity_ns,
 295        },
 296        {
 297                .ctl_name       = CTL_UNNUMBERED,
 298                .procname       = "sched_shares_ratelimit",
 299                .data           = &sysctl_sched_shares_ratelimit,
 300                .maxlen         = sizeof(unsigned int),
 301                .mode           = 0644,
 302                .proc_handler   = &proc_dointvec,
 303        },
 304        {
 305                .ctl_name       = CTL_UNNUMBERED,
 306                .procname       = "sched_shares_thresh",
 307                .data           = &sysctl_sched_shares_thresh,
 308                .maxlen         = sizeof(unsigned int),
 309                .mode           = 0644,
 310                .proc_handler   = &proc_dointvec_minmax,
 311                .strategy       = &sysctl_intvec,
 312                .extra1         = &zero,
 313        },
 314        {
 315                .ctl_name       = CTL_UNNUMBERED,
 316                .procname       = "sched_features",
 317                .data           = &sysctl_sched_features,
 318                .maxlen         = sizeof(unsigned int),
 319                .mode           = 0644,
 320                .proc_handler   = &proc_dointvec,
 321        },
 322        {
 323                .ctl_name       = CTL_UNNUMBERED,
 324                .procname       = "sched_migration_cost",
 325                .data           = &sysctl_sched_migration_cost,
 326                .maxlen         = sizeof(unsigned int),
 327                .mode           = 0644,
 328                .proc_handler   = &proc_dointvec,
 329        },
 330        {
 331                .ctl_name       = CTL_UNNUMBERED,
 332                .procname       = "sched_nr_migrate",
 333                .data           = &sysctl_sched_nr_migrate,
 334                .maxlen         = sizeof(unsigned int),
 335                .mode           = 0644,
 336                .proc_handler   = &proc_dointvec,
 337        },
 338        {
 339                .ctl_name       = CTL_UNNUMBERED,
 340                .procname       = "sched_time_avg",
 341                .data           = &sysctl_sched_time_avg,
 342                .maxlen         = sizeof(unsigned int),
 343                .mode           = 0644,
 344                .proc_handler   = &proc_dointvec,
 345        },
 346        {
 347                .ctl_name       = CTL_UNNUMBERED,
 348                .procname       = "timer_migration",
 349                .data           = &sysctl_timer_migration,
 350                .maxlen         = sizeof(unsigned int),
 351                .mode           = 0644,
 352                .proc_handler   = &proc_dointvec_minmax,
 353                .strategy       = &sysctl_intvec,
 354                .extra1         = &zero,
 355                .extra2         = &one,
 356        },
 357#endif
 358        {
 359                .ctl_name       = CTL_UNNUMBERED,
 360                .procname       = "sched_rt_period_us",
 361                .data           = &sysctl_sched_rt_period,
 362                .maxlen         = sizeof(unsigned int),
 363                .mode           = 0644,
 364                .proc_handler   = &sched_rt_handler,
 365        },
 366        {
 367                .ctl_name       = CTL_UNNUMBERED,
 368                .procname       = "sched_rt_runtime_us",
 369                .data           = &sysctl_sched_rt_runtime,
 370                .maxlen         = sizeof(int),
 371                .mode           = 0644,
 372                .proc_handler   = &sched_rt_handler,
 373        },
 374        {
 375                .ctl_name       = CTL_UNNUMBERED,
 376                .procname       = "sched_compat_yield",
 377                .data           = &sysctl_sched_compat_yield,
 378                .maxlen         = sizeof(unsigned int),
 379                .mode           = 0644,
 380                .proc_handler   = &proc_dointvec,
 381        },
 382#ifdef CONFIG_PROVE_LOCKING
 383        {
 384                .ctl_name       = CTL_UNNUMBERED,
 385                .procname       = "prove_locking",
 386                .data           = &prove_locking,
 387                .maxlen         = sizeof(int),
 388                .mode           = 0644,
 389                .proc_handler   = &proc_dointvec,
 390        },
 391#endif
 392#ifdef CONFIG_LOCK_STAT
 393        {
 394                .ctl_name       = CTL_UNNUMBERED,
 395                .procname       = "lock_stat",
 396                .data           = &lock_stat,
 397                .maxlen         = sizeof(int),
 398                .mode           = 0644,
 399                .proc_handler   = &proc_dointvec,
 400        },
 401#endif
 402        {
 403                .ctl_name       = KERN_PANIC,
 404                .procname       = "panic",
 405                .data           = &panic_timeout,
 406                .maxlen         = sizeof(int),
 407                .mode           = 0644,
 408                .proc_handler   = &proc_dointvec,
 409        },
 410        {
 411                .ctl_name       = KERN_CORE_USES_PID,
 412                .procname       = "core_uses_pid",
 413                .data           = &core_uses_pid,
 414                .maxlen         = sizeof(int),
 415                .mode           = 0644,
 416                .proc_handler   = &proc_dointvec,
 417        },
 418        {
 419                .ctl_name       = KERN_CORE_PATTERN,
 420                .procname       = "core_pattern",
 421                .data           = core_pattern,
 422                .maxlen         = CORENAME_MAX_SIZE,
 423                .mode           = 0644,
 424                .proc_handler   = &proc_dostring,
 425                .strategy       = &sysctl_string,
 426        },
 427        {
 428                .ctl_name       = CTL_UNNUMBERED,
 429                .procname       = "core_pipe_limit",
 430                .data           = &core_pipe_limit,
 431                .maxlen         = sizeof(unsigned int),
 432                .mode           = 0644,
 433                .proc_handler   = &proc_dointvec,
 434        },
 435#ifdef CONFIG_PROC_SYSCTL
 436        {
 437                .procname       = "tainted",
 438                .maxlen         = sizeof(long),
 439                .mode           = 0644,
 440                .proc_handler   = &proc_taint,
 441        },
 442#endif
 443#ifdef CONFIG_LATENCYTOP
 444        {
 445                .procname       = "latencytop",
 446                .data           = &latencytop_enabled,
 447                .maxlen         = sizeof(int),
 448                .mode           = 0644,
 449                .proc_handler   = &proc_dointvec,
 450        },
 451#endif
 452#ifdef CONFIG_BLK_DEV_INITRD
 453        {
 454                .ctl_name       = KERN_REALROOTDEV,
 455                .procname       = "real-root-dev",
 456                .data           = &real_root_dev,
 457                .maxlen         = sizeof(int),
 458                .mode           = 0644,
 459                .proc_handler   = &proc_dointvec,
 460        },
 461#endif
 462        {
 463                .ctl_name       = CTL_UNNUMBERED,
 464                .procname       = "print-fatal-signals",
 465                .data           = &print_fatal_signals,
 466                .maxlen         = sizeof(int),
 467                .mode           = 0644,
 468                .proc_handler   = &proc_dointvec,
 469        },
 470#ifdef CONFIG_SPARC
 471        {
 472                .ctl_name       = KERN_SPARC_REBOOT,
 473                .procname       = "reboot-cmd",
 474                .data           = reboot_command,
 475                .maxlen         = 256,
 476                .mode           = 0644,
 477                .proc_handler   = &proc_dostring,
 478                .strategy       = &sysctl_string,
 479        },
 480        {
 481                .ctl_name       = KERN_SPARC_STOP_A,
 482                .procname       = "stop-a",
 483                .data           = &stop_a_enabled,
 484                .maxlen         = sizeof (int),
 485                .mode           = 0644,
 486                .proc_handler   = &proc_dointvec,
 487        },
 488        {
 489                .ctl_name       = KERN_SPARC_SCONS_PWROFF,
 490                .procname       = "scons-poweroff",
 491                .data           = &scons_pwroff,
 492                .maxlen         = sizeof (int),
 493                .mode           = 0644,
 494                .proc_handler   = &proc_dointvec,
 495        },
 496#endif
 497#ifdef CONFIG_SPARC64
 498        {
 499                .ctl_name       = CTL_UNNUMBERED,
 500                .procname       = "tsb-ratio",
 501                .data           = &sysctl_tsb_ratio,
 502                .maxlen         = sizeof (int),
 503                .mode           = 0644,
 504                .proc_handler   = &proc_dointvec,
 505        },
 506#endif
 507#ifdef __hppa__
 508        {
 509                .ctl_name       = KERN_HPPA_PWRSW,
 510                .procname       = "soft-power",
 511                .data           = &pwrsw_enabled,
 512                .maxlen         = sizeof (int),
 513                .mode           = 0644,
 514                .proc_handler   = &proc_dointvec,
 515        },
 516        {
 517                .ctl_name       = KERN_HPPA_UNALIGNED,
 518                .procname       = "unaligned-trap",
 519                .data           = &unaligned_enabled,
 520                .maxlen         = sizeof (int),
 521                .mode           = 0644,
 522                .proc_handler   = &proc_dointvec,
 523        },
 524#endif
 525        {
 526                .ctl_name       = KERN_CTLALTDEL,
 527                .procname       = "ctrl-alt-del",
 528                .data           = &C_A_D,
 529                .maxlen         = sizeof(int),
 530                .mode           = 0644,
 531                .proc_handler   = &proc_dointvec,
 532        },
 533#ifdef CONFIG_FUNCTION_TRACER
 534        {
 535                .ctl_name       = CTL_UNNUMBERED,
 536                .procname       = "ftrace_enabled",
 537                .data           = &ftrace_enabled,
 538                .maxlen         = sizeof(int),
 539                .mode           = 0644,
 540                .proc_handler   = &ftrace_enable_sysctl,
 541        },
 542#endif
 543#ifdef CONFIG_STACK_TRACER
 544        {
 545                .ctl_name       = CTL_UNNUMBERED,
 546                .procname       = "stack_tracer_enabled",
 547                .data           = &stack_tracer_enabled,
 548                .maxlen         = sizeof(int),
 549                .mode           = 0644,
 550                .proc_handler   = &stack_trace_sysctl,
 551        },
 552#endif
 553#ifdef CONFIG_TRACING
 554        {
 555                .ctl_name       = CTL_UNNUMBERED,
 556                .procname       = "ftrace_dump_on_oops",
 557                .data           = &ftrace_dump_on_oops,
 558                .maxlen         = sizeof(int),
 559                .mode           = 0644,
 560                .proc_handler   = &proc_dointvec,
 561        },
 562#endif
 563#ifdef CONFIG_MODULES
 564        {
 565                .ctl_name       = KERN_MODPROBE,
 566                .procname       = "modprobe",
 567                .data           = &modprobe_path,
 568                .maxlen         = KMOD_PATH_LEN,
 569                .mode           = 0644,
 570                .proc_handler   = &proc_dostring,
 571                .strategy       = &sysctl_string,
 572        },
 573        {
 574                .ctl_name       = CTL_UNNUMBERED,
 575                .procname       = "modules_disabled",
 576                .data           = &modules_disabled,
 577                .maxlen         = sizeof(int),
 578                .mode           = 0644,
 579                /* only handle a transition from default "0" to "1" */
 580                .proc_handler   = &proc_dointvec_minmax,
 581                .extra1         = &one,
 582                .extra2         = &one,
 583        },
 584#endif
 585#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 586        {
 587                .ctl_name       = KERN_HOTPLUG,
 588                .procname       = "hotplug",
 589                .data           = &uevent_helper,
 590                .maxlen         = UEVENT_HELPER_PATH_LEN,
 591                .mode           = 0644,
 592                .proc_handler   = &proc_dostring,
 593                .strategy       = &sysctl_string,
 594        },
 595#endif
 596#ifdef CONFIG_CHR_DEV_SG
 597        {
 598                .ctl_name       = KERN_SG_BIG_BUFF,
 599                .procname       = "sg-big-buff",
 600                .data           = &sg_big_buff,
 601                .maxlen         = sizeof (int),
 602                .mode           = 0444,
 603                .proc_handler   = &proc_dointvec,
 604        },
 605#endif
 606#ifdef CONFIG_BSD_PROCESS_ACCT
 607        {
 608                .ctl_name       = KERN_ACCT,
 609                .procname       = "acct",
 610                .data           = &acct_parm,
 611                .maxlen         = 3*sizeof(int),
 612                .mode           = 0644,
 613                .proc_handler   = &proc_dointvec,
 614        },
 615#endif
 616#ifdef CONFIG_MAGIC_SYSRQ
 617        {
 618                .ctl_name       = KERN_SYSRQ,
 619                .procname       = "sysrq",
 620                .data           = &__sysrq_enabled,
 621                .maxlen         = sizeof (int),
 622                .mode           = 0644,
 623                .proc_handler   = &proc_dointvec,
 624        },
 625#endif
 626#ifdef CONFIG_PROC_SYSCTL
 627        {
 628                .procname       = "cad_pid",
 629                .data           = NULL,
 630                .maxlen         = sizeof (int),
 631                .mode           = 0600,
 632                .proc_handler   = &proc_do_cad_pid,
 633        },
 634#endif
 635        {
 636                .ctl_name       = KERN_MAX_THREADS,
 637                .procname       = "threads-max",
 638                .data           = &max_threads,
 639                .maxlen         = sizeof(int),
 640                .mode           = 0644,
 641                .proc_handler   = &proc_dointvec,
 642        },
 643        {
 644                .ctl_name       = KERN_RANDOM,
 645                .procname       = "random",
 646                .mode           = 0555,
 647                .child          = random_table,
 648        },
 649        {
 650                .ctl_name       = KERN_OVERFLOWUID,
 651                .procname       = "overflowuid",
 652                .data           = &overflowuid,
 653                .maxlen         = sizeof(int),
 654                .mode           = 0644,
 655                .proc_handler   = &proc_dointvec_minmax,
 656                .strategy       = &sysctl_intvec,
 657                .extra1         = &minolduid,
 658                .extra2         = &maxolduid,
 659        },
 660        {
 661                .ctl_name       = KERN_OVERFLOWGID,
 662                .procname       = "overflowgid",
 663                .data           = &overflowgid,
 664                .maxlen         = sizeof(int),
 665                .mode           = 0644,
 666                .proc_handler   = &proc_dointvec_minmax,
 667                .strategy       = &sysctl_intvec,
 668                .extra1         = &minolduid,
 669                .extra2         = &maxolduid,
 670        },
 671#ifdef CONFIG_S390
 672#ifdef CONFIG_MATHEMU
 673        {
 674                .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
 675                .procname       = "ieee_emulation_warnings",
 676                .data           = &sysctl_ieee_emulation_warnings,
 677                .maxlen         = sizeof(int),
 678                .mode           = 0644,
 679                .proc_handler   = &proc_dointvec,
 680        },
 681#endif
 682        {
 683                .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
 684                .procname       = "userprocess_debug",
 685                .data           = &sysctl_userprocess_debug,
 686                .maxlen         = sizeof(int),
 687                .mode           = 0644,
 688                .proc_handler   = &proc_dointvec,
 689        },
 690#endif
 691        {
 692                .ctl_name       = KERN_PIDMAX,
 693                .procname       = "pid_max",
 694                .data           = &pid_max,
 695                .maxlen         = sizeof (int),
 696                .mode           = 0644,
 697                .proc_handler   = &proc_dointvec_minmax,
 698                .strategy       = sysctl_intvec,
 699                .extra1         = &pid_max_min,
 700                .extra2         = &pid_max_max,
 701        },
 702        {
 703                .ctl_name       = KERN_PANIC_ON_OOPS,
 704                .procname       = "panic_on_oops",
 705                .data           = &panic_on_oops,
 706                .maxlen         = sizeof(int),
 707                .mode           = 0644,
 708                .proc_handler   = &proc_dointvec,
 709        },
 710#if defined CONFIG_PRINTK
 711        {
 712                .ctl_name       = KERN_PRINTK,
 713                .procname       = "printk",
 714                .data           = &console_loglevel,
 715                .maxlen         = 4*sizeof(int),
 716                .mode           = 0644,
 717                .proc_handler   = &proc_dointvec,
 718        },
 719        {
 720                .ctl_name       = KERN_PRINTK_RATELIMIT,
 721                .procname       = "printk_ratelimit",
 722                .data           = &printk_ratelimit_state.interval,
 723                .maxlen         = sizeof(int),
 724                .mode           = 0644,
 725                .proc_handler   = &proc_dointvec_jiffies,
 726                .strategy       = &sysctl_jiffies,
 727        },
 728        {
 729                .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
 730                .procname       = "printk_ratelimit_burst",
 731                .data           = &printk_ratelimit_state.burst,
 732                .maxlen         = sizeof(int),
 733                .mode           = 0644,
 734                .proc_handler   = &proc_dointvec,
 735        },
 736        {
 737                .ctl_name       = CTL_UNNUMBERED,
 738                .procname       = "printk_delay",
 739                .data           = &printk_delay_msec,
 740                .maxlen         = sizeof(int),
 741                .mode           = 0644,
 742                .proc_handler   = &proc_dointvec_minmax,
 743                .strategy       = &sysctl_intvec,
 744                .extra1         = &zero,
 745                .extra2         = &ten_thousand,
 746        },
 747#endif
 748        {
 749                .ctl_name       = KERN_NGROUPS_MAX,
 750                .procname       = "ngroups_max",
 751                .data           = &ngroups_max,
 752                .maxlen         = sizeof (int),
 753                .mode           = 0444,
 754                .proc_handler   = &proc_dointvec,
 755        },
 756#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 757        {
 758                .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
 759                .procname       = "unknown_nmi_panic",
 760                .data           = &unknown_nmi_panic,
 761                .maxlen         = sizeof (int),
 762                .mode           = 0644,
 763                .proc_handler   = &proc_dointvec,
 764        },
 765        {
 766                .procname       = "nmi_watchdog",
 767                .data           = &nmi_watchdog_enabled,
 768                .maxlen         = sizeof (int),
 769                .mode           = 0644,
 770                .proc_handler   = &proc_nmi_enabled,
 771        },
 772#endif
 773#if defined(CONFIG_X86)
 774        {
 775                .ctl_name       = KERN_PANIC_ON_NMI,
 776                .procname       = "panic_on_unrecovered_nmi",
 777                .data           = &panic_on_unrecovered_nmi,
 778                .maxlen         = sizeof(int),
 779                .mode           = 0644,
 780                .proc_handler   = &proc_dointvec,
 781        },
 782        {
 783                .ctl_name       = CTL_UNNUMBERED,
 784                .procname       = "panic_on_io_nmi",
 785                .data           = &panic_on_io_nmi,
 786                .maxlen         = sizeof(int),
 787                .mode           = 0644,
 788                .proc_handler   = &proc_dointvec,
 789        },
 790        {
 791                .ctl_name       = KERN_BOOTLOADER_TYPE,
 792                .procname       = "bootloader_type",
 793                .data           = &bootloader_type,
 794                .maxlen         = sizeof (int),
 795                .mode           = 0444,
 796                .proc_handler   = &proc_dointvec,
 797        },
 798        {
 799                .ctl_name       = CTL_UNNUMBERED,
 800                .procname       = "bootloader_version",
 801                .data           = &bootloader_version,
 802                .maxlen         = sizeof (int),
 803                .mode           = 0444,
 804                .proc_handler   = &proc_dointvec,
 805        },
 806        {
 807                .ctl_name       = CTL_UNNUMBERED,
 808                .procname       = "kstack_depth_to_print",
 809                .data           = &kstack_depth_to_print,
 810                .maxlen         = sizeof(int),
 811                .mode           = 0644,
 812                .proc_handler   = &proc_dointvec,
 813        },
 814        {
 815                .ctl_name       = CTL_UNNUMBERED,
 816                .procname       = "io_delay_type",
 817                .data           = &io_delay_type,
 818                .maxlen         = sizeof(int),
 819                .mode           = 0644,
 820                .proc_handler   = &proc_dointvec,
 821        },
 822#endif
 823#if defined(CONFIG_MMU)
 824        {
 825                .ctl_name       = KERN_RANDOMIZE,
 826                .procname       = "randomize_va_space",
 827                .data           = &randomize_va_space,
 828                .maxlen         = sizeof(int),
 829                .mode           = 0644,
 830                .proc_handler   = &proc_dointvec,
 831        },
 832#endif
 833#if defined(CONFIG_S390) && defined(CONFIG_SMP)
 834        {
 835                .ctl_name       = KERN_SPIN_RETRY,
 836                .procname       = "spin_retry",
 837                .data           = &spin_retry,
 838                .maxlen         = sizeof (int),
 839                .mode           = 0644,
 840                .proc_handler   = &proc_dointvec,
 841        },
 842#endif
 843#if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
 844        {
 845                .procname       = "acpi_video_flags",
 846                .data           = &acpi_realmode_flags,
 847                .maxlen         = sizeof (unsigned long),
 848                .mode           = 0644,
 849                .proc_handler   = &proc_doulongvec_minmax,
 850        },
 851#endif
 852#ifdef CONFIG_IA64
 853        {
 854                .ctl_name       = KERN_IA64_UNALIGNED,
 855                .procname       = "ignore-unaligned-usertrap",
 856                .data           = &no_unaligned_warning,
 857                .maxlen         = sizeof (int),
 858                .mode           = 0644,
 859                .proc_handler   = &proc_dointvec,
 860        },
 861        {
 862                .ctl_name       = CTL_UNNUMBERED,
 863                .procname       = "unaligned-dump-stack",
 864                .data           = &unaligned_dump_stack,
 865                .maxlen         = sizeof (int),
 866                .mode           = 0644,
 867                .proc_handler   = &proc_dointvec,
 868        },
 869#endif
 870#ifdef CONFIG_DETECT_SOFTLOCKUP
 871        {
 872                .ctl_name       = CTL_UNNUMBERED,
 873                .procname       = "softlockup_panic",
 874                .data           = &softlockup_panic,
 875                .maxlen         = sizeof(int),
 876                .mode           = 0644,
 877                .proc_handler   = &proc_dointvec_minmax,
 878                .strategy       = &sysctl_intvec,
 879                .extra1         = &zero,
 880                .extra2         = &one,
 881        },
 882        {
 883                .ctl_name       = CTL_UNNUMBERED,
 884                .procname       = "softlockup_thresh",
 885                .data           = &softlockup_thresh,
 886                .maxlen         = sizeof(int),
 887                .mode           = 0644,
 888                .proc_handler   = &proc_dosoftlockup_thresh,
 889                .strategy       = &sysctl_intvec,
 890                .extra1         = &neg_one,
 891                .extra2         = &sixty,
 892        },
 893#endif
 894#ifdef CONFIG_DETECT_HUNG_TASK
 895        {
 896                .ctl_name       = CTL_UNNUMBERED,
 897                .procname       = "hung_task_panic",
 898                .data           = &sysctl_hung_task_panic,
 899                .maxlen         = sizeof(int),
 900                .mode           = 0644,
 901                .proc_handler   = &proc_dointvec_minmax,
 902                .strategy       = &sysctl_intvec,
 903                .extra1         = &zero,
 904                .extra2         = &one,
 905        },
 906        {
 907                .ctl_name       = CTL_UNNUMBERED,
 908                .procname       = "hung_task_check_count",
 909                .data           = &sysctl_hung_task_check_count,
 910                .maxlen         = sizeof(unsigned long),
 911                .mode           = 0644,
 912                .proc_handler   = &proc_doulongvec_minmax,
 913                .strategy       = &sysctl_intvec,
 914        },
 915        {
 916                .ctl_name       = CTL_UNNUMBERED,
 917                .procname       = "hung_task_timeout_secs",
 918                .data           = &sysctl_hung_task_timeout_secs,
 919                .maxlen         = sizeof(unsigned long),
 920                .mode           = 0644,
 921                .proc_handler   = &proc_dohung_task_timeout_secs,
 922                .strategy       = &sysctl_intvec,
 923        },
 924        {
 925                .ctl_name       = CTL_UNNUMBERED,
 926                .procname       = "hung_task_warnings",
 927                .data           = &sysctl_hung_task_warnings,
 928                .maxlen         = sizeof(unsigned long),
 929                .mode           = 0644,
 930                .proc_handler   = &proc_doulongvec_minmax,
 931                .strategy       = &sysctl_intvec,
 932        },
 933#endif
 934#ifdef CONFIG_COMPAT
 935        {
 936                .ctl_name       = KERN_COMPAT_LOG,
 937                .procname       = "compat-log",
 938                .data           = &compat_log,
 939                .maxlen         = sizeof (int),
 940                .mode           = 0644,
 941                .proc_handler   = &proc_dointvec,
 942        },
 943#endif
 944#ifdef CONFIG_RT_MUTEXES
 945        {
 946                .ctl_name       = KERN_MAX_LOCK_DEPTH,
 947                .procname       = "max_lock_depth",
 948                .data           = &max_lock_depth,
 949                .maxlen         = sizeof(int),
 950                .mode           = 0644,
 951                .proc_handler   = &proc_dointvec,
 952        },
 953#endif
 954        {
 955                .ctl_name       = CTL_UNNUMBERED,
 956                .procname       = "poweroff_cmd",
 957                .data           = &poweroff_cmd,
 958                .maxlen         = POWEROFF_CMD_PATH_LEN,
 959                .mode           = 0644,
 960                .proc_handler   = &proc_dostring,
 961                .strategy       = &sysctl_string,
 962        },
 963#ifdef CONFIG_KEYS
 964        {
 965                .ctl_name       = CTL_UNNUMBERED,
 966                .procname       = "keys",
 967                .mode           = 0555,
 968                .child          = key_sysctls,
 969        },
 970#endif
 971#ifdef CONFIG_RCU_TORTURE_TEST
 972        {
 973                .ctl_name       = CTL_UNNUMBERED,
 974                .procname       = "rcutorture_runnable",
 975                .data           = &rcutorture_runnable,
 976                .maxlen         = sizeof(int),
 977                .mode           = 0644,
 978                .proc_handler   = &proc_dointvec,
 979        },
 980#endif
 981#ifdef CONFIG_SLOW_WORK
 982        {
 983                .ctl_name       = CTL_UNNUMBERED,
 984                .procname       = "slow-work",
 985                .mode           = 0555,
 986                .child          = slow_work_sysctls,
 987        },
 988#endif
 989#ifdef CONFIG_PERF_EVENTS
 990        {
 991                .ctl_name       = CTL_UNNUMBERED,
 992                .procname       = "perf_event_paranoid",
 993                .data           = &sysctl_perf_event_paranoid,
 994                .maxlen         = sizeof(sysctl_perf_event_paranoid),
 995                .mode           = 0644,
 996                .proc_handler   = &proc_dointvec,
 997        },
 998        {
 999                .ctl_name       = CTL_UNNUMBERED,
1000                .procname       = "perf_event_mlock_kb",
1001                .data           = &sysctl_perf_event_mlock,
1002                .maxlen         = sizeof(sysctl_perf_event_mlock),
1003                .mode           = 0644,
1004                .proc_handler   = &proc_dointvec,
1005        },
1006        {
1007                .ctl_name       = CTL_UNNUMBERED,
1008                .procname       = "perf_event_max_sample_rate",
1009                .data           = &sysctl_perf_event_sample_rate,
1010                .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1011                .mode           = 0644,
1012                .proc_handler   = &proc_dointvec,
1013        },
1014#endif
1015#ifdef CONFIG_KMEMCHECK
1016        {
1017                .ctl_name       = CTL_UNNUMBERED,
1018                .procname       = "kmemcheck",
1019                .data           = &kmemcheck_enabled,
1020                .maxlen         = sizeof(int),
1021                .mode           = 0644,
1022                .proc_handler   = &proc_dointvec,
1023        },
1024#endif
1025#ifdef CONFIG_BLOCK
1026        {
1027                .ctl_name       = CTL_UNNUMBERED,
1028                .procname       = "blk_iopoll",
1029                .data           = &blk_iopoll_enabled,
1030                .maxlen         = sizeof(int),
1031                .mode           = 0644,
1032                .proc_handler   = &proc_dointvec,
1033        },
1034#endif
1035/*
1036 * NOTE: do not add new entries to this table unless you have read
1037 * Documentation/sysctl/ctl_unnumbered.txt
1038 */
1039        { .ctl_name = 0 }
1040};
1041
1042static struct ctl_table vm_table[] = {
1043        {
1044                .ctl_name       = VM_OVERCOMMIT_MEMORY,
1045                .procname       = "overcommit_memory",
1046                .data           = &sysctl_overcommit_memory,
1047                .maxlen         = sizeof(sysctl_overcommit_memory),
1048                .mode           = 0644,
1049                .proc_handler   = &proc_dointvec,
1050        },
1051        {
1052                .ctl_name       = VM_PANIC_ON_OOM,
1053                .procname       = "panic_on_oom",
1054                .data           = &sysctl_panic_on_oom,
1055                .maxlen         = sizeof(sysctl_panic_on_oom),
1056                .mode           = 0644,
1057                .proc_handler   = &proc_dointvec,
1058        },
1059        {
1060                .ctl_name       = CTL_UNNUMBERED,
1061                .procname       = "oom_kill_allocating_task",
1062                .data           = &sysctl_oom_kill_allocating_task,
1063                .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1064                .mode           = 0644,
1065                .proc_handler   = &proc_dointvec,
1066        },
1067        {
1068                .ctl_name       = CTL_UNNUMBERED,
1069                .procname       = "oom_dump_tasks",
1070                .data           = &sysctl_oom_dump_tasks,
1071                .maxlen         = sizeof(sysctl_oom_dump_tasks),
1072                .mode           = 0644,
1073                .proc_handler   = &proc_dointvec,
1074        },
1075        {
1076                .ctl_name       = VM_OVERCOMMIT_RATIO,
1077                .procname       = "overcommit_ratio",
1078                .data           = &sysctl_overcommit_ratio,
1079                .maxlen         = sizeof(sysctl_overcommit_ratio),
1080                .mode           = 0644,
1081                .proc_handler   = &proc_dointvec,
1082        },
1083        {
1084                .ctl_name       = VM_PAGE_CLUSTER,
1085                .procname       = "page-cluster", 
1086                .data           = &page_cluster,
1087                .maxlen         = sizeof(int),
1088                .mode           = 0644,
1089                .proc_handler   = &proc_dointvec,
1090        },
1091        {
1092                .ctl_name       = VM_DIRTY_BACKGROUND,
1093                .procname       = "dirty_background_ratio",
1094                .data           = &dirty_background_ratio,
1095                .maxlen         = sizeof(dirty_background_ratio),
1096                .mode           = 0644,
1097                .proc_handler   = &dirty_background_ratio_handler,
1098                .strategy       = &sysctl_intvec,
1099                .extra1         = &zero,
1100                .extra2         = &one_hundred,
1101        },
1102        {
1103                .ctl_name       = CTL_UNNUMBERED,
1104                .procname       = "dirty_background_bytes",
1105                .data           = &dirty_background_bytes,
1106                .maxlen         = sizeof(dirty_background_bytes),
1107                .mode           = 0644,
1108                .proc_handler   = &dirty_background_bytes_handler,
1109                .strategy       = &sysctl_intvec,
1110                .extra1         = &one_ul,
1111        },
1112        {
1113                .ctl_name       = VM_DIRTY_RATIO,
1114                .procname       = "dirty_ratio",
1115                .data           = &vm_dirty_ratio,
1116                .maxlen         = sizeof(vm_dirty_ratio),
1117                .mode           = 0644,
1118                .proc_handler   = &dirty_ratio_handler,
1119                .strategy       = &sysctl_intvec,
1120                .extra1         = &zero,
1121                .extra2         = &one_hundred,
1122        },
1123        {
1124                .ctl_name       = CTL_UNNUMBERED,
1125                .procname       = "dirty_bytes",
1126                .data           = &vm_dirty_bytes,
1127                .maxlen         = sizeof(vm_dirty_bytes),
1128                .mode           = 0644,
1129                .proc_handler   = &dirty_bytes_handler,
1130                .strategy       = &sysctl_intvec,
1131                .extra1         = &dirty_bytes_min,
1132        },
1133        {
1134                .procname       = "dirty_writeback_centisecs",
1135                .data           = &dirty_writeback_interval,
1136                .maxlen         = sizeof(dirty_writeback_interval),
1137                .mode           = 0644,
1138                .proc_handler   = &dirty_writeback_centisecs_handler,
1139        },
1140        {
1141                .procname       = "dirty_expire_centisecs",
1142                .data           = &dirty_expire_interval,
1143                .maxlen         = sizeof(dirty_expire_interval),
1144                .mode           = 0644,
1145                .proc_handler   = &proc_dointvec,
1146        },
1147        {
1148                .ctl_name       = VM_NR_PDFLUSH_THREADS,
1149                .procname       = "nr_pdflush_threads",
1150                .data           = &nr_pdflush_threads,
1151                .maxlen         = sizeof nr_pdflush_threads,
1152                .mode           = 0444 /* read-only*/,
1153                .proc_handler   = &proc_dointvec,
1154        },
1155        {
1156                .ctl_name       = VM_SWAPPINESS,
1157                .procname       = "swappiness",
1158                .data           = &vm_swappiness,
1159                .maxlen         = sizeof(vm_swappiness),
1160                .mode           = 0644,
1161                .proc_handler   = &proc_dointvec_minmax,
1162                .strategy       = &sysctl_intvec,
1163                .extra1         = &zero,
1164                .extra2         = &one_hundred,
1165        },
1166#ifdef CONFIG_HUGETLB_PAGE
1167         {
1168                .procname       = "nr_hugepages",
1169                .data           = NULL,
1170                .maxlen         = sizeof(unsigned long),
1171                .mode           = 0644,
1172                .proc_handler   = &hugetlb_sysctl_handler,
1173                .extra1         = (void *)&hugetlb_zero,
1174                .extra2         = (void *)&hugetlb_infinity,
1175         },
1176         {
1177                .ctl_name       = VM_HUGETLB_GROUP,
1178                .procname       = "hugetlb_shm_group",
1179                .data           = &sysctl_hugetlb_shm_group,
1180                .maxlen         = sizeof(gid_t),
1181                .mode           = 0644,
1182                .proc_handler   = &proc_dointvec,
1183         },
1184         {
1185                .ctl_name       = CTL_UNNUMBERED,
1186                .procname       = "hugepages_treat_as_movable",
1187                .data           = &hugepages_treat_as_movable,
1188                .maxlen         = sizeof(int),
1189                .mode           = 0644,
1190                .proc_handler   = &hugetlb_treat_movable_handler,
1191        },
1192        {
1193                .ctl_name       = CTL_UNNUMBERED,
1194                .procname       = "nr_overcommit_hugepages",
1195                .data           = NULL,
1196                .maxlen         = sizeof(unsigned long),
1197                .mode           = 0644,
1198                .proc_handler   = &hugetlb_overcommit_handler,
1199                .extra1         = (void *)&hugetlb_zero,
1200                .extra2         = (void *)&hugetlb_infinity,
1201        },
1202#endif
1203        {
1204                .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
1205                .procname       = "lowmem_reserve_ratio",
1206                .data           = &sysctl_lowmem_reserve_ratio,
1207                .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1208                .mode           = 0644,
1209                .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
1210                .strategy       = &sysctl_intvec,
1211        },
1212        {
1213                .ctl_name       = VM_DROP_PAGECACHE,
1214                .procname       = "drop_caches",
1215                .data           = &sysctl_drop_caches,
1216                .maxlen         = sizeof(int),
1217                .mode           = 0644,
1218                .proc_handler   = drop_caches_sysctl_handler,
1219                .strategy       = &sysctl_intvec,
1220        },
1221        {
1222                .ctl_name       = VM_MIN_FREE_KBYTES,
1223                .procname       = "min_free_kbytes",
1224                .data           = &min_free_kbytes,
1225                .maxlen         = sizeof(min_free_kbytes),
1226                .mode           = 0644,
1227                .proc_handler   = &min_free_kbytes_sysctl_handler,
1228                .strategy       = &sysctl_intvec,
1229                .extra1         = &zero,
1230        },
1231        {
1232                .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
1233                .procname       = "percpu_pagelist_fraction",
1234                .data           = &percpu_pagelist_fraction,
1235                .maxlen         = sizeof(percpu_pagelist_fraction),
1236                .mode           = 0644,
1237                .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
1238                .strategy       = &sysctl_intvec,
1239                .extra1         = &min_percpu_pagelist_fract,
1240        },
1241#ifdef CONFIG_MMU
1242        {
1243                .ctl_name       = VM_MAX_MAP_COUNT,
1244                .procname       = "max_map_count",
1245                .data           = &sysctl_max_map_count,
1246                .maxlen         = sizeof(sysctl_max_map_count),
1247                .mode           = 0644,
1248                .proc_handler   = &proc_dointvec
1249        },
1250#else
1251        {
1252                .ctl_name       = CTL_UNNUMBERED,
1253                .procname       = "nr_trim_pages",
1254                .data           = &sysctl_nr_trim_pages,
1255                .maxlen         = sizeof(sysctl_nr_trim_pages),
1256                .mode           = 0644,
1257                .proc_handler   = &proc_dointvec_minmax,
1258                .strategy       = &sysctl_intvec,
1259                .extra1         = &zero,
1260        },
1261#endif
1262        {
1263                .ctl_name       = VM_LAPTOP_MODE,
1264                .procname       = "laptop_mode",
1265                .data           = &laptop_mode,
1266                .maxlen         = sizeof(laptop_mode),
1267                .mode           = 0644,
1268                .proc_handler   = &proc_dointvec_jiffies,
1269                .strategy       = &sysctl_jiffies,
1270        },
1271        {
1272                .ctl_name       = VM_BLOCK_DUMP,
1273                .procname       = "block_dump",
1274                .data           = &block_dump,
1275                .maxlen         = sizeof(block_dump),
1276                .mode           = 0644,
1277                .proc_handler   = &proc_dointvec,
1278                .strategy       = &sysctl_intvec,
1279                .extra1         = &zero,
1280        },
1281        {
1282                .ctl_name       = VM_VFS_CACHE_PRESSURE,
1283                .procname       = "vfs_cache_pressure",
1284                .data           = &sysctl_vfs_cache_pressure,
1285                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1286                .mode           = 0644,
1287                .proc_handler   = &proc_dointvec,
1288                .strategy       = &sysctl_intvec,
1289                .extra1         = &zero,
1290        },
1291#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1292        {
1293                .ctl_name       = VM_LEGACY_VA_LAYOUT,
1294                .procname       = "legacy_va_layout",
1295                .data           = &sysctl_legacy_va_layout,
1296                .maxlen         = sizeof(sysctl_legacy_va_layout),
1297                .mode           = 0644,
1298                .proc_handler   = &proc_dointvec,
1299                .strategy       = &sysctl_intvec,
1300                .extra1         = &zero,
1301        },
1302#endif
1303#ifdef CONFIG_NUMA
1304        {
1305                .ctl_name       = VM_ZONE_RECLAIM_MODE,
1306                .procname       = "zone_reclaim_mode",
1307                .data           = &zone_reclaim_mode,
1308                .maxlen         = sizeof(zone_reclaim_mode),
1309                .mode           = 0644,
1310                .proc_handler   = &proc_dointvec,
1311                .strategy       = &sysctl_intvec,
1312                .extra1         = &zero,
1313        },
1314        {
1315                .ctl_name       = VM_MIN_UNMAPPED,
1316                .procname       = "min_unmapped_ratio",
1317                .data           = &sysctl_min_unmapped_ratio,
1318                .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1319                .mode           = 0644,
1320                .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
1321                .strategy       = &sysctl_intvec,
1322                .extra1         = &zero,
1323                .extra2         = &one_hundred,
1324        },
1325        {
1326                .ctl_name       = VM_MIN_SLAB,
1327                .procname       = "min_slab_ratio",
1328                .data           = &sysctl_min_slab_ratio,
1329                .maxlen         = sizeof(sysctl_min_slab_ratio),
1330                .mode           = 0644,
1331                .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
1332                .strategy       = &sysctl_intvec,
1333                .extra1         = &zero,
1334                .extra2         = &one_hundred,
1335        },
1336#endif
1337#ifdef CONFIG_SMP
1338        {
1339                .ctl_name       = CTL_UNNUMBERED,
1340                .procname       = "stat_interval",
1341                .data           = &sysctl_stat_interval,
1342                .maxlen         = sizeof(sysctl_stat_interval),
1343                .mode           = 0644,
1344                .proc_handler   = &proc_dointvec_jiffies,
1345                .strategy       = &sysctl_jiffies,
1346        },
1347#endif
1348        {
1349                .ctl_name       = CTL_UNNUMBERED,
1350                .procname       = "mmap_min_addr",
1351                .data           = &dac_mmap_min_addr,
1352                .maxlen         = sizeof(unsigned long),
1353                .mode           = 0644,
1354                .proc_handler   = &mmap_min_addr_handler,
1355        },
1356#ifdef CONFIG_NUMA
1357        {
1358                .ctl_name       = CTL_UNNUMBERED,
1359                .procname       = "numa_zonelist_order",
1360                .data           = &numa_zonelist_order,
1361                .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1362                .mode           = 0644,
1363                .proc_handler   = &numa_zonelist_order_handler,
1364                .strategy       = &sysctl_string,
1365        },
1366#endif
1367#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1368   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1369        {
1370                .ctl_name       = VM_VDSO_ENABLED,
1371                .procname       = "vdso_enabled",
1372                .data           = &vdso_enabled,
1373                .maxlen         = sizeof(vdso_enabled),
1374                .mode           = 0644,
1375                .proc_handler   = &proc_dointvec,
1376                .strategy       = &sysctl_intvec,
1377                .extra1         = &zero,
1378        },
1379#endif
1380#ifdef CONFIG_HIGHMEM
1381        {
1382                .ctl_name       = CTL_UNNUMBERED,
1383                .procname       = "highmem_is_dirtyable",
1384                .data           = &vm_highmem_is_dirtyable,
1385                .maxlen         = sizeof(vm_highmem_is_dirtyable),
1386                .mode           = 0644,
1387                .proc_handler   = &proc_dointvec_minmax,
1388                .strategy       = &sysctl_intvec,
1389                .extra1         = &zero,
1390                .extra2         = &one,
1391        },
1392#endif
1393        {
1394                .ctl_name       = CTL_UNNUMBERED,
1395                .procname       = "scan_unevictable_pages",
1396                .data           = &scan_unevictable_pages,
1397                .maxlen         = sizeof(scan_unevictable_pages),
1398                .mode           = 0644,
1399                .proc_handler   = &scan_unevictable_handler,
1400        },
1401#ifdef CONFIG_MEMORY_FAILURE
1402        {
1403                .ctl_name       = CTL_UNNUMBERED,
1404                .procname       = "memory_failure_early_kill",
1405                .data           = &sysctl_memory_failure_early_kill,
1406                .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1407                .mode           = 0644,
1408                .proc_handler   = &proc_dointvec_minmax,
1409                .strategy       = &sysctl_intvec,
1410                .extra1         = &zero,
1411                .extra2         = &one,
1412        },
1413        {
1414                .ctl_name       = CTL_UNNUMBERED,
1415                .procname       = "memory_failure_recovery",
1416                .data           = &sysctl_memory_failure_recovery,
1417                .maxlen         = sizeof(sysctl_memory_failure_recovery),
1418                .mode           = 0644,
1419                .proc_handler   = &proc_dointvec_minmax,
1420                .strategy       = &sysctl_intvec,
1421                .extra1         = &zero,
1422                .extra2         = &one,
1423        },
1424#endif
1425
1426/*
1427 * NOTE: do not add new entries to this table unless you have read
1428 * Documentation/sysctl/ctl_unnumbered.txt
1429 */
1430        { .ctl_name = 0 }
1431};
1432
1433#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1434static struct ctl_table binfmt_misc_table[] = {
1435        { .ctl_name = 0 }
1436};
1437#endif
1438
1439static struct ctl_table fs_table[] = {
1440        {
1441                .ctl_name       = FS_NRINODE,
1442                .procname       = "inode-nr",
1443                .data           = &inodes_stat,
1444                .maxlen         = 2*sizeof(int),
1445                .mode           = 0444,
1446                .proc_handler   = &proc_dointvec,
1447        },
1448        {
1449                .ctl_name       = FS_STATINODE,
1450                .procname       = "inode-state",
1451                .data           = &inodes_stat,
1452                .maxlen         = 7*sizeof(int),
1453                .mode           = 0444,
1454                .proc_handler   = &proc_dointvec,
1455        },
1456        {
1457                .procname       = "file-nr",
1458                .data           = &files_stat,
1459                .maxlen         = 3*sizeof(int),
1460                .mode           = 0444,
1461                .proc_handler   = &proc_nr_files,
1462        },
1463        {
1464                .ctl_name       = FS_MAXFILE,
1465                .procname       = "file-max",
1466                .data           = &files_stat.max_files,
1467                .maxlen         = sizeof(int),
1468                .mode           = 0644,
1469                .proc_handler   = &proc_dointvec,
1470        },
1471        {
1472                .ctl_name       = CTL_UNNUMBERED,
1473                .procname       = "nr_open",
1474                .data           = &sysctl_nr_open,
1475                .maxlen         = sizeof(int),
1476                .mode           = 0644,
1477                .proc_handler   = &proc_dointvec_minmax,
1478                .extra1         = &sysctl_nr_open_min,
1479                .extra2         = &sysctl_nr_open_max,
1480        },
1481        {
1482                .ctl_name       = FS_DENTRY,
1483                .procname       = "dentry-state",
1484                .data           = &dentry_stat,
1485                .maxlen         = 6*sizeof(int),
1486                .mode           = 0444,
1487                .proc_handler   = &proc_dointvec,
1488        },
1489        {
1490                .ctl_name       = FS_OVERFLOWUID,
1491                .procname       = "overflowuid",
1492                .data           = &fs_overflowuid,
1493                .maxlen         = sizeof(int),
1494                .mode           = 0644,
1495                .proc_handler   = &proc_dointvec_minmax,
1496                .strategy       = &sysctl_intvec,
1497                .extra1         = &minolduid,
1498                .extra2         = &maxolduid,
1499        },
1500        {
1501                .ctl_name       = FS_OVERFLOWGID,
1502                .procname       = "overflowgid",
1503                .data           = &fs_overflowgid,
1504                .maxlen         = sizeof(int),
1505                .mode           = 0644,
1506                .proc_handler   = &proc_dointvec_minmax,
1507                .strategy       = &sysctl_intvec,
1508                .extra1         = &minolduid,
1509                .extra2         = &maxolduid,
1510        },
1511#ifdef CONFIG_FILE_LOCKING
1512        {
1513                .ctl_name       = FS_LEASES,
1514                .procname       = "leases-enable",
1515                .data           = &leases_enable,
1516                .maxlen         = sizeof(int),
1517                .mode           = 0644,
1518                .proc_handler   = &proc_dointvec,
1519        },
1520#endif
1521#ifdef CONFIG_DNOTIFY
1522        {
1523                .ctl_name       = FS_DIR_NOTIFY,
1524                .procname       = "dir-notify-enable",
1525                .data           = &dir_notify_enable,
1526                .maxlen         = sizeof(int),
1527                .mode           = 0644,
1528                .proc_handler   = &proc_dointvec,
1529        },
1530#endif
1531#ifdef CONFIG_MMU
1532#ifdef CONFIG_FILE_LOCKING
1533        {
1534                .ctl_name       = FS_LEASE_TIME,
1535                .procname       = "lease-break-time",
1536                .data           = &lease_break_time,
1537                .maxlen         = sizeof(int),
1538                .mode           = 0644,
1539                .proc_handler   = &proc_dointvec,
1540        },
1541#endif
1542#ifdef CONFIG_AIO
1543        {
1544                .procname       = "aio-nr",
1545                .data           = &aio_nr,
1546                .maxlen         = sizeof(aio_nr),
1547                .mode           = 0444,
1548                .proc_handler   = &proc_doulongvec_minmax,
1549        },
1550        {
1551                .procname       = "aio-max-nr",
1552                .data           = &aio_max_nr,
1553                .maxlen         = sizeof(aio_max_nr),
1554                .mode           = 0644,
1555                .proc_handler   = &proc_doulongvec_minmax,
1556        },
1557#endif /* CONFIG_AIO */
1558#ifdef CONFIG_INOTIFY_USER
1559        {
1560                .ctl_name       = FS_INOTIFY,
1561                .procname       = "inotify",
1562                .mode           = 0555,
1563                .child          = inotify_table,
1564        },
1565#endif  
1566#ifdef CONFIG_EPOLL
1567        {
1568                .procname       = "epoll",
1569                .mode           = 0555,
1570                .child          = epoll_table,
1571        },
1572#endif
1573#endif
1574        {
1575                .ctl_name       = KERN_SETUID_DUMPABLE,
1576                .procname       = "suid_dumpable",
1577                .data           = &suid_dumpable,
1578                .maxlen         = sizeof(int),
1579                .mode           = 0644,
1580                .proc_handler   = &proc_dointvec_minmax,
1581                .strategy       = &sysctl_intvec,
1582                .extra1         = &zero,
1583                .extra2         = &two,
1584        },
1585#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1586        {
1587                .ctl_name       = CTL_UNNUMBERED,
1588                .procname       = "binfmt_misc",
1589                .mode           = 0555,
1590                .child          = binfmt_misc_table,
1591        },
1592#endif
1593/*
1594 * NOTE: do not add new entries to this table unless you have read
1595 * Documentation/sysctl/ctl_unnumbered.txt
1596 */
1597        { .ctl_name = 0 }
1598};
1599
1600static struct ctl_table debug_table[] = {
1601#if defined(CONFIG_X86) || defined(CONFIG_PPC)
1602        {
1603                .ctl_name       = CTL_UNNUMBERED,
1604                .procname       = "exception-trace",
1605                .data           = &show_unhandled_signals,
1606                .maxlen         = sizeof(int),
1607                .mode           = 0644,
1608                .proc_handler   = proc_dointvec
1609        },
1610#endif
1611        { .ctl_name = 0 }
1612};
1613
1614static struct ctl_table dev_table[] = {
1615        { .ctl_name = 0 }
1616};
1617
1618static DEFINE_SPINLOCK(sysctl_lock);
1619
1620/* called under sysctl_lock */
1621static int use_table(struct ctl_table_header *p)
1622{
1623        if (unlikely(p->unregistering))
1624                return 0;
1625        p->used++;
1626        return 1;
1627}
1628
1629/* called under sysctl_lock */
1630static void unuse_table(struct ctl_table_header *p)
1631{
1632        if (!--p->used)
1633                if (unlikely(p->unregistering))
1634                        complete(p->unregistering);
1635}
1636
1637/* called under sysctl_lock, will reacquire if has to wait */
1638static void start_unregistering(struct ctl_table_header *p)
1639{
1640        /*
1641         * if p->used is 0, nobody will ever touch that entry again;
1642         * we'll eliminate all paths to it before dropping sysctl_lock
1643         */
1644        if (unlikely(p->used)) {
1645                struct completion wait;
1646                init_completion(&wait);
1647                p->unregistering = &wait;
1648                spin_unlock(&sysctl_lock);
1649                wait_for_completion(&wait);
1650                spin_lock(&sysctl_lock);
1651        } else {
1652                /* anything non-NULL; we'll never dereference it */
1653                p->unregistering = ERR_PTR(-EINVAL);
1654        }
1655        /*
1656         * do not remove from the list until nobody holds it; walking the
1657         * list in do_sysctl() relies on that.
1658         */
1659        list_del_init(&p->ctl_entry);
1660}
1661
1662void sysctl_head_get(struct ctl_table_header *head)
1663{
1664        spin_lock(&sysctl_lock);
1665        head->count++;
1666        spin_unlock(&sysctl_lock);
1667}
1668
1669void sysctl_head_put(struct ctl_table_header *head)
1670{
1671        spin_lock(&sysctl_lock);
1672        if (!--head->count)
1673                kfree(head);
1674        spin_unlock(&sysctl_lock);
1675}
1676
1677struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1678{
1679        if (!head)
1680                BUG();
1681        spin_lock(&sysctl_lock);
1682        if (!use_table(head))
1683                head = ERR_PTR(-ENOENT);
1684        spin_unlock(&sysctl_lock);
1685        return head;
1686}
1687
1688void sysctl_head_finish(struct ctl_table_header *head)
1689{
1690        if (!head)
1691                return;
1692        spin_lock(&sysctl_lock);
1693        unuse_table(head);
1694        spin_unlock(&sysctl_lock);
1695}
1696
1697static struct ctl_table_set *
1698lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1699{
1700        struct ctl_table_set *set = &root->default_set;
1701        if (root->lookup)
1702                set = root->lookup(root, namespaces);
1703        return set;
1704}
1705
1706static struct list_head *
1707lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1708{
1709        struct ctl_table_set *set = lookup_header_set(root, namespaces);
1710        return &set->list;
1711}
1712
1713struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1714                                            struct ctl_table_header *prev)
1715{
1716        struct ctl_table_root *root;
1717        struct list_head *header_list;
1718        struct ctl_table_header *head;
1719        struct list_head *tmp;
1720
1721        spin_lock(&sysctl_lock);
1722        if (prev) {
1723                head = prev;
1724                tmp = &prev->ctl_entry;
1725                unuse_table(prev);
1726                goto next;
1727        }
1728        tmp = &root_table_header.ctl_entry;
1729        for (;;) {
1730                head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1731
1732                if (!use_table(head))
1733                        goto next;
1734                spin_unlock(&sysctl_lock);
1735                return head;
1736        next:
1737                root = head->root;
1738                tmp = tmp->next;
1739                header_list = lookup_header_list(root, namespaces);
1740                if (tmp != header_list)
1741                        continue;
1742
1743                do {
1744                        root = list_entry(root->root_list.next,
1745                                        struct ctl_table_root, root_list);
1746                        if (root == &sysctl_table_root)
1747                                goto out;
1748                        header_list = lookup_header_list(root, namespaces);
1749                } while (list_empty(header_list));
1750                tmp = header_list->next;
1751        }
1752out:
1753        spin_unlock(&sysctl_lock);
1754        return NULL;
1755}
1756
1757struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1758{
1759        return __sysctl_head_next(current->nsproxy, prev);
1760}
1761
1762void register_sysctl_root(struct ctl_table_root *root)
1763{
1764        spin_lock(&sysctl_lock);
1765        list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1766        spin_unlock(&sysctl_lock);
1767}
1768
1769#ifdef CONFIG_SYSCTL_SYSCALL
1770/* Perform the actual read/write of a sysctl table entry. */
1771static int do_sysctl_strategy(struct ctl_table_root *root,
1772                        struct ctl_table *table,
1773                        void __user *oldval, size_t __user *oldlenp,
1774                        void __user *newval, size_t newlen)
1775{
1776        int op = 0, rc;
1777
1778        if (oldval)
1779                op |= MAY_READ;
1780        if (newval)
1781                op |= MAY_WRITE;
1782        if (sysctl_perm(root, table, op))
1783                return -EPERM;
1784
1785        if (table->strategy) {
1786                rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1787                if (rc < 0)
1788                        return rc;
1789                if (rc > 0)
1790                        return 0;
1791        }
1792
1793        /* If there is no strategy routine, or if the strategy returns
1794         * zero, proceed with automatic r/w */
1795        if (table->data && table->maxlen) {
1796                rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1797                if (rc < 0)
1798                        return rc;
1799        }
1800        return 0;
1801}
1802
1803static int parse_table(int __user *name, int nlen,
1804                       void __user *oldval, size_t __user *oldlenp,
1805                       void __user *newval, size_t newlen,
1806                       struct ctl_table_root *root,
1807                       struct ctl_table *table)
1808{
1809        int n;
1810repeat:
1811        if (!nlen)
1812                return -ENOTDIR;
1813        if (get_user(n, name))
1814                return -EFAULT;
1815        for ( ; table->ctl_name || table->procname; table++) {
1816                if (!table->ctl_name)
1817                        continue;
1818                if (n == table->ctl_name) {
1819                        int error;
1820                        if (table->child) {
1821                                if (sysctl_perm(root, table, MAY_EXEC))
1822                                        return -EPERM;
1823                                name++;
1824                                nlen--;
1825                                table = table->child;
1826                                goto repeat;
1827                        }
1828                        error = do_sysctl_strategy(root, table,
1829                                                   oldval, oldlenp,
1830                                                   newval, newlen);
1831                        return error;
1832                }
1833        }
1834        return -ENOTDIR;
1835}
1836
1837int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1838               void __user *newval, size_t newlen)
1839{
1840        struct ctl_table_header *head;
1841        int error = -ENOTDIR;
1842
1843        if (nlen <= 0 || nlen >= CTL_MAXNAME)
1844                return -ENOTDIR;
1845        if (oldval) {
1846                int old_len;
1847                if (!oldlenp || get_user(old_len, oldlenp))
1848                        return -EFAULT;
1849        }
1850
1851        for (head = sysctl_head_next(NULL); head;
1852                        head = sysctl_head_next(head)) {
1853                error = parse_table(name, nlen, oldval, oldlenp, 
1854                                        newval, newlen,
1855                                        head->root, head->ctl_table);
1856                if (error != -ENOTDIR) {
1857                        sysctl_head_finish(head);
1858                        break;
1859                }
1860        }
1861        return error;
1862}
1863
1864SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1865{
1866        struct __sysctl_args tmp;
1867        int error;
1868
1869        if (copy_from_user(&tmp, args, sizeof(tmp)))
1870                return -EFAULT;
1871
1872        error = deprecated_sysctl_warning(&tmp);
1873        if (error)
1874                goto out;
1875
1876        lock_kernel();
1877        error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1878                          tmp.newval, tmp.newlen);
1879        unlock_kernel();
1880out:
1881        return error;
1882}
1883#endif /* CONFIG_SYSCTL_SYSCALL */
1884
1885/*
1886 * sysctl_perm does NOT grant the superuser all rights automatically, because
1887 * some sysctl variables are readonly even to root.
1888 */
1889
1890static int test_perm(int mode, int op)
1891{
1892        if (!current_euid())
1893                mode >>= 6;
1894        else if (in_egroup_p(0))
1895                mode >>= 3;
1896        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1897                return 0;
1898        return -EACCES;
1899}
1900
1901int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1902{
1903        int error;
1904        int mode;
1905
1906        error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1907        if (error)
1908                return error;
1909
1910        if (root->permissions)
1911                mode = root->permissions(root, current->nsproxy, table);
1912        else
1913                mode = table->mode;
1914
1915        return test_perm(mode, op);
1916}
1917
1918static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1919{
1920        for (; table->ctl_name || table->procname; table++) {
1921                table->parent = parent;
1922                if (table->child)
1923                        sysctl_set_parent(table, table->child);
1924        }
1925}
1926
1927static __init int sysctl_init(void)
1928{
1929        sysctl_set_parent(NULL, root_table);
1930#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1931        {
1932                int err;
1933                err = sysctl_check_table(current->nsproxy, root_table);
1934        }
1935#endif
1936        return 0;
1937}
1938
1939core_initcall(sysctl_init);
1940
1941static struct ctl_table *is_branch_in(struct ctl_table *branch,
1942                                      struct ctl_table *table)
1943{
1944        struct ctl_table *p;
1945        const char *s = branch->procname;
1946
1947        /* branch should have named subdirectory as its first element */
1948        if (!s || !branch->child)
1949                return NULL;
1950
1951        /* ... and nothing else */
1952        if (branch[1].procname || branch[1].ctl_name)
1953                return NULL;
1954
1955        /* table should contain subdirectory with the same name */
1956        for (p = table; p->procname || p->ctl_name; p++) {
1957                if (!p->child)
1958                        continue;
1959                if (p->procname && strcmp(p->procname, s) == 0)
1960                        return p;
1961        }
1962        return NULL;
1963}
1964
1965/* see if attaching q to p would be an improvement */
1966static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1967{
1968        struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1969        struct ctl_table *next;
1970        int is_better = 0;
1971        int not_in_parent = !p->attached_by;
1972
1973        while ((next = is_branch_in(by, to)) != NULL) {
1974                if (by == q->attached_by)
1975                        is_better = 1;
1976                if (to == p->attached_by)
1977                        not_in_parent = 1;
1978                by = by->child;
1979                to = next->child;
1980        }
1981
1982        if (is_better && not_in_parent) {
1983                q->attached_by = by;
1984                q->attached_to = to;
1985                q->parent = p;
1986        }
1987}
1988
1989/**
1990 * __register_sysctl_paths - register a sysctl hierarchy
1991 * @root: List of sysctl headers to register on
1992 * @namespaces: Data to compute which lists of sysctl entries are visible
1993 * @path: The path to the directory the sysctl table is in.
1994 * @table: the top-level table structure
1995 *
1996 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1997 * array. A completely 0 filled entry terminates the table.
1998 *
1999 * The members of the &struct ctl_table structure are used as follows:
2000 *
2001 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
2002 *            must be unique within that level of sysctl
2003 *
2004 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
2005 *            enter a sysctl file
2006 *
2007 * data - a pointer to data for use by proc_handler
2008 *
2009 * maxlen - the maximum size in bytes of the data
2010 *
2011 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
2012 *
2013 * child - a pointer to the child sysctl table if this entry is a directory, or
2014 *         %NULL.
2015 *
2016 * proc_handler - the text handler routine (described below)
2017 *
2018 * strategy - the strategy routine (described below)
2019 *
2020 * de - for internal use by the sysctl routines
2021 *
2022 * extra1, extra2 - extra pointers usable by the proc handler routines
2023 *
2024 * Leaf nodes in the sysctl tree will be represented by a single file
2025 * under /proc; non-leaf nodes will be represented by directories.
2026 *
2027 * sysctl(2) can automatically manage read and write requests through
2028 * the sysctl table.  The data and maxlen fields of the ctl_table
2029 * struct enable minimal validation of the values being written to be
2030 * performed, and the mode field allows minimal authentication.
2031 *
2032 * More sophisticated management can be enabled by the provision of a
2033 * strategy routine with the table entry.  This will be called before
2034 * any automatic read or write of the data is performed.
2035 *
2036 * The strategy routine may return
2037 *
2038 * < 0 - Error occurred (error is passed to user process)
2039 *
2040 * 0   - OK - proceed with automatic read or write.
2041 *
2042 * > 0 - OK - read or write has been done by the strategy routine, so
2043 *       return immediately.
2044 *
2045 * There must be a proc_handler routine for any terminal nodes
2046 * mirrored under /proc/sys (non-terminals are handled by a built-in
2047 * directory handler).  Several default handlers are available to
2048 * cover common cases -
2049 *
2050 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
2051 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
2052 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
2053 *
2054 * It is the handler's job to read the input buffer from user memory
2055 * and process it. The handler should return 0 on success.
2056 *
2057 * This routine returns %NULL on a failure to register, and a pointer
2058 * to the table header on success.
2059 */
2060struct ctl_table_header *__register_sysctl_paths(
2061        struct ctl_table_root *root,
2062        struct nsproxy *namespaces,
2063        const struct ctl_path *path, struct ctl_table *table)
2064{
2065        struct ctl_table_header *header;
2066        struct ctl_table *new, **prevp;
2067        unsigned int n, npath;
2068        struct ctl_table_set *set;
2069
2070        /* Count the path components */
2071        for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
2072                ;
2073
2074        /*
2075         * For each path component, allocate a 2-element ctl_table array.
2076         * The first array element will be filled with the sysctl entry
2077         * for this, the second will be the sentinel (ctl_name == 0).
2078         *
2079         * We allocate everything in one go so that we don't have to
2080         * worry about freeing additional memory in unregister_sysctl_table.
2081         */
2082        header = kzalloc(sizeof(struct ctl_table_header) +
2083                         (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
2084        if (!header)
2085                return NULL;
2086
2087        new = (struct ctl_table *) (header + 1);
2088
2089        /* Now connect the dots */
2090        prevp = &header->ctl_table;
2091        for (n = 0; n < npath; ++n, ++path) {
2092                /* Copy the procname */
2093                new->procname = path->procname;
2094                new->ctl_name = path->ctl_name;
2095                new->mode     = 0555;
2096
2097                *prevp = new;
2098                prevp = &new->child;
2099
2100                new += 2;
2101        }
2102        *prevp = table;
2103        header->ctl_table_arg = table;
2104
2105        INIT_LIST_HEAD(&header->ctl_entry);
2106        header->used = 0;
2107        header->unregistering = NULL;
2108        header->root = root;
2109        sysctl_set_parent(NULL, header->ctl_table);
2110        header->count = 1;
2111#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
2112        if (sysctl_check_table(namespaces, header->ctl_table)) {
2113                kfree(header);
2114                return NULL;
2115        }
2116#endif
2117        spin_lock(&sysctl_lock);
2118        header->set = lookup_header_set(root, namespaces);
2119        header->attached_by = header->ctl_table;
2120        header->attached_to = root_table;
2121        header->parent = &root_table_header;
2122        for (set = header->set; set; set = set->parent) {
2123                struct ctl_table_header *p;
2124                list_for_each_entry(p, &set->list, ctl_entry) {
2125                        if (p->unregistering)
2126                                continue;
2127                        try_attach(p, header);
2128                }
2129        }
2130        header->parent->count++;
2131        list_add_tail(&header->ctl_entry, &header->set->list);
2132        spin_unlock(&sysctl_lock);
2133
2134        return header;
2135}
2136
2137/**
2138 * register_sysctl_table_path - register a sysctl table hierarchy
2139 * @path: The path to the directory the sysctl table is in.
2140 * @table: the top-level table structure
2141 *
2142 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2143 * array. A completely 0 filled entry terminates the table.
2144 *
2145 * See __register_sysctl_paths for more details.
2146 */
2147struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2148                                                struct ctl_table *table)
2149{
2150        return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
2151                                        path, table);
2152}
2153
2154/**
2155 * register_sysctl_table - register a sysctl table hierarchy
2156 * @table: the top-level table structure
2157 *
2158 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2159 * array. A completely 0 filled entry terminates the table.
2160 *
2161 * See register_sysctl_paths for more details.
2162 */
2163struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
2164{
2165        static const struct ctl_path null_path[] = { {} };
2166
2167        return register_sysctl_paths(null_path, table);
2168}
2169
2170/**
2171 * unregister_sysctl_table - unregister a sysctl table hierarchy
2172 * @header: the header returned from register_sysctl_table
2173 *
2174 * Unregisters the sysctl table and all children. proc entries may not
2175 * actually be removed until they are no longer used by anyone.
2176 */
2177void unregister_sysctl_table(struct ctl_table_header * header)
2178{
2179        might_sleep();
2180
2181        if (header == NULL)
2182                return;
2183
2184        spin_lock(&sysctl_lock);
2185        start_unregistering(header);
2186        if (!--header->parent->count) {
2187                WARN_ON(1);
2188                kfree(header->parent);
2189        }
2190        if (!--header->count)
2191                kfree(header);
2192        spin_unlock(&sysctl_lock);
2193}
2194
2195int sysctl_is_seen(struct ctl_table_header *p)
2196{
2197        struct ctl_table_set *set = p->set;
2198        int res;
2199        spin_lock(&sysctl_lock);
2200        if (p->unregistering)
2201                res = 0;
2202        else if (!set->is_seen)
2203                res = 1;
2204        else
2205                res = set->is_seen(set);
2206        spin_unlock(&sysctl_lock);
2207        return res;
2208}
2209
2210void setup_sysctl_set(struct ctl_table_set *p,
2211        struct ctl_table_set *parent,
2212        int (*is_seen)(struct ctl_table_set *))
2213{
2214        INIT_LIST_HEAD(&p->list);
2215        p->parent = parent ? parent : &sysctl_table_root.default_set;
2216        p->is_seen = is_seen;
2217}
2218
2219#else /* !CONFIG_SYSCTL */
2220struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2221{
2222        return NULL;
2223}
2224
2225struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2226                                                    struct ctl_table *table)
2227{
2228        return NULL;
2229}
2230
2231void unregister_sysctl_table(struct ctl_table_header * table)
2232{
2233}
2234
2235void setup_sysctl_set(struct ctl_table_set *p,
2236        struct ctl_table_set *parent,
2237        int (*is_seen)(struct ctl_table_set *))
2238{
2239}
2240
2241void sysctl_head_put(struct ctl_table_header *head)
2242{
2243}
2244
2245#endif /* CONFIG_SYSCTL */
2246
2247/*
2248 * /proc/sys support
2249 */
2250
2251#ifdef CONFIG_PROC_SYSCTL
2252
2253static int _proc_do_string(void* data, int maxlen, int write,
2254                           void __user *buffer,
2255                           size_t *lenp, loff_t *ppos)
2256{
2257        size_t len;
2258        char __user *p;
2259        char c;
2260
2261        if (!data || !maxlen || !*lenp) {
2262                *lenp = 0;
2263                return 0;
2264        }
2265
2266        if (write) {
2267                len = 0;
2268                p = buffer;
2269                while (len < *lenp) {
2270                        if (get_user(c, p++))
2271                                return -EFAULT;
2272                        if (c == 0 || c == '\n')
2273                                break;
2274                        len++;
2275                }
2276                if (len >= maxlen)
2277                        len = maxlen-1;
2278                if(copy_from_user(data, buffer, len))
2279                        return -EFAULT;
2280                ((char *) data)[len] = 0;
2281                *ppos += *lenp;
2282        } else {
2283                len = strlen(data);
2284                if (len > maxlen)
2285                        len = maxlen;
2286
2287                if (*ppos > len) {
2288                        *lenp = 0;
2289                        return 0;
2290                }
2291
2292                data += *ppos;
2293                len  -= *ppos;
2294
2295                if (len > *lenp)
2296                        len = *lenp;
2297                if (len)
2298                        if(copy_to_user(buffer, data, len))
2299                                return -EFAULT;
2300                if (len < *lenp) {
2301                        if(put_user('\n', ((char __user *) buffer) + len))
2302                                return -EFAULT;
2303                        len++;
2304                }
2305                *lenp = len;
2306                *ppos += len;
2307        }
2308        return 0;
2309}
2310
2311/**
2312 * proc_dostring - read a string sysctl
2313 * @table: the sysctl table
2314 * @write: %TRUE if this is a write to the sysctl file
2315 * @buffer: the user buffer
2316 * @lenp: the size of the user buffer
2317 * @ppos: file position
2318 *
2319 * Reads/writes a string from/to the user buffer. If the kernel
2320 * buffer provided is not large enough to hold the string, the
2321 * string is truncated. The copied string is %NULL-terminated.
2322 * If the string is being read by the user process, it is copied
2323 * and a newline '\n' is added. It is truncated if the buffer is
2324 * not large enough.
2325 *
2326 * Returns 0 on success.
2327 */
2328int proc_dostring(struct ctl_table *table, int write,
2329                  void __user *buffer, size_t *lenp, loff_t *ppos)
2330{
2331        return _proc_do_string(table->data, table->maxlen, write,
2332                               buffer, lenp, ppos);
2333}
2334
2335
2336static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2337                                 int *valp,
2338                                 int write, void *data)
2339{
2340        if (write) {
2341                *valp = *negp ? -*lvalp : *lvalp;
2342        } else {
2343                int val = *valp;
2344                if (val < 0) {
2345                        *negp = -1;
2346                        *lvalp = (unsigned long)-val;
2347                } else {
2348                        *negp = 0;
2349                        *lvalp = (unsigned long)val;
2350                }
2351        }
2352        return 0;
2353}
2354
2355static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2356                  int write, void __user *buffer,
2357                  size_t *lenp, loff_t *ppos,
2358                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2359                              int write, void *data),
2360                  void *data)
2361{
2362#define TMPBUFLEN 21
2363        int *i, vleft, first = 1, neg;
2364        unsigned long lval;
2365        size_t left, len;
2366        
2367        char buf[TMPBUFLEN], *p;
2368        char __user *s = buffer;
2369        
2370        if (!tbl_data || !table->maxlen || !*lenp ||
2371            (*ppos && !write)) {
2372                *lenp = 0;
2373                return 0;
2374        }
2375        
2376        i = (int *) tbl_data;
2377        vleft = table->maxlen / sizeof(*i);
2378        left = *lenp;
2379
2380        if (!conv)
2381                conv = do_proc_dointvec_conv;
2382
2383        for (; left && vleft--; i++, first=0) {
2384                if (write) {
2385                        while (left) {
2386                                char c;
2387                                if (get_user(c, s))
2388                                        return -EFAULT;
2389                                if (!isspace(c))
2390                                        break;
2391                                left--;
2392                                s++;
2393                        }
2394                        if (!left)
2395                                break;
2396                        neg = 0;
2397                        len = left;
2398                        if (len > sizeof(buf) - 1)
2399                                len = sizeof(buf) - 1;
2400                        if (copy_from_user(buf, s, len))
2401                                return -EFAULT;
2402                        buf[len] = 0;
2403                        p = buf;
2404                        if (*p == '-' && left > 1) {
2405                                neg = 1;
2406                                p++;
2407                        }
2408                        if (*p < '0' || *p > '9')
2409                                break;
2410
2411                        lval = simple_strtoul(p, &p, 0);
2412
2413                        len = p-buf;
2414                        if ((len < left) && *p && !isspace(*p))
2415                                break;
2416                        s += len;
2417                        left -= len;
2418
2419                        if (conv(&neg, &lval, i, 1, data))
2420                                break;
2421                } else {
2422                        p = buf;
2423                        if (!first)
2424                                *p++ = '\t';
2425        
2426                        if (conv(&neg, &lval, i, 0, data))
2427                                break;
2428
2429                        sprintf(p, "%s%lu", neg ? "-" : "", lval);
2430                        len = strlen(buf);
2431                        if (len > left)
2432                                len = left;
2433                        if(copy_to_user(s, buf, len))
2434                                return -EFAULT;
2435                        left -= len;
2436                        s += len;
2437                }
2438        }
2439
2440        if (!write && !first && left) {
2441                if(put_user('\n', s))
2442                        return -EFAULT;
2443                left--, s++;
2444        }
2445        if (write) {
2446                while (left) {
2447                        char c;
2448                        if (get_user(c, s++))
2449                                return -EFAULT;
2450                        if (!isspace(c))
2451                                break;
2452                        left--;
2453                }
2454        }
2455        if (write && first)
2456                return -EINVAL;
2457        *lenp -= left;
2458        *ppos += *lenp;
2459        return 0;
2460#undef TMPBUFLEN
2461}
2462
2463static int do_proc_dointvec(struct ctl_table *table, int write,
2464                  void __user *buffer, size_t *lenp, loff_t *ppos,
2465                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2466                              int write, void *data),
2467                  void *data)
2468{
2469        return __do_proc_dointvec(table->data, table, write,
2470                        buffer, lenp, ppos, conv, data);
2471}
2472
2473/**
2474 * proc_dointvec - read a vector of integers
2475 * @table: the sysctl table
2476 * @write: %TRUE if this is a write to the sysctl file
2477 * @buffer: the user buffer
2478 * @lenp: the size of the user buffer
2479 * @ppos: file position
2480 *
2481 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2482 * values from/to the user buffer, treated as an ASCII string. 
2483 *
2484 * Returns 0 on success.
2485 */
2486int proc_dointvec(struct ctl_table *table, int write,
2487                     void __user *buffer, size_t *lenp, loff_t *ppos)
2488{
2489    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2490                            NULL,NULL);
2491}
2492
2493/*
2494 * Taint values can only be increased
2495 * This means we can safely use a temporary.
2496 */
2497static int proc_taint(struct ctl_table *table, int write,
2498                               void __user *buffer, size_t *lenp, loff_t *ppos)
2499{
2500        struct ctl_table t;
2501        unsigned long tmptaint = get_taint();
2502        int err;
2503
2504        if (write && !capable(CAP_SYS_ADMIN))
2505                return -EPERM;
2506
2507        t = *table;
2508        t.data = &tmptaint;
2509        err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2510        if (err < 0)
2511                return err;
2512
2513        if (write) {
2514                /*
2515                 * Poor man's atomic or. Not worth adding a primitive
2516                 * to everyone's atomic.h for this
2517                 */
2518                int i;
2519                for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2520                        if ((tmptaint >> i) & 1)
2521                                add_taint(i);
2522                }
2523        }
2524
2525        return err;
2526}
2527
2528struct do_proc_dointvec_minmax_conv_param {
2529        int *min;
2530        int *max;
2531};
2532
2533static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
2534                                        int *valp, 
2535                                        int write, void *data)
2536{
2537        struct do_proc_dointvec_minmax_conv_param *param = data;
2538        if (write) {
2539                int val = *negp ? -*lvalp : *lvalp;
2540                if ((param->min && *param->min > val) ||
2541                    (param->max && *param->max < val))
2542                        return -EINVAL;
2543                *valp = val;
2544        } else {
2545                int val = *valp;
2546                if (val < 0) {
2547                        *negp = -1;
2548                        *lvalp = (unsigned long)-val;
2549                } else {
2550                        *negp = 0;
2551                        *lvalp = (unsigned long)val;
2552                }
2553        }
2554        return 0;
2555}
2556
2557/**
2558 * proc_dointvec_minmax - read a vector of integers with min/max values
2559 * @table: the sysctl table
2560 * @write: %TRUE if this is a write to the sysctl file
2561 * @buffer: the user buffer
2562 * @lenp: the size of the user buffer
2563 * @ppos: file position
2564 *
2565 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2566 * values from/to the user buffer, treated as an ASCII string.
2567 *
2568 * This routine will ensure the values are within the range specified by
2569 * table->extra1 (min) and table->extra2 (max).
2570 *
2571 * Returns 0 on success.
2572 */
2573int proc_dointvec_minmax(struct ctl_table *table, int write,
2574                  void __user *buffer, size_t *lenp, loff_t *ppos)
2575{
2576        struct do_proc_dointvec_minmax_conv_param param = {
2577                .min = (int *) table->extra1,
2578                .max = (int *) table->extra2,
2579        };
2580        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2581                                do_proc_dointvec_minmax_conv, &param);
2582}
2583
2584static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2585                                     void __user *buffer,
2586                                     size_t *lenp, loff_t *ppos,
2587                                     unsigned long convmul,
2588                                     unsigned long convdiv)
2589{
2590#define TMPBUFLEN 21
2591        unsigned long *i, *min, *max, val;
2592        int vleft, first=1, neg;
2593        size_t len, left;
2594        char buf[TMPBUFLEN], *p;
2595        char __user *s = buffer;
2596        
2597        if (!data || !table->maxlen || !*lenp ||
2598            (*ppos && !write)) {
2599                *lenp = 0;
2600                return 0;
2601        }
2602        
2603        i = (unsigned long *) data;
2604        min = (unsigned long *) table->extra1;
2605        max = (unsigned long *) table->extra2;
2606        vleft = table->maxlen / sizeof(unsigned long);
2607        left = *lenp;
2608        
2609        for (; left && vleft--; i++, min++, max++, first=0) {
2610                if (write) {
2611                        while (left) {
2612                                char c;
2613                                if (get_user(c, s))
2614                                        return -EFAULT;
2615                                if (!isspace(c))
2616                                        break;
2617                                left--;
2618                                s++;
2619                        }
2620                        if (!left)
2621                                break;
2622                        neg = 0;
2623                        len = left;
2624                        if (len > TMPBUFLEN-1)
2625                                len = TMPBUFLEN-1;
2626                        if (copy_from_user(buf, s, len))
2627                                return -EFAULT;
2628                        buf[len] = 0;
2629                        p = buf;
2630                        if (*p == '-' && left > 1) {
2631                                neg = 1;
2632                                p++;
2633                        }
2634                        if (*p < '0' || *p > '9')
2635                                break;
2636                        val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2637                        len = p-buf;
2638                        if ((len < left) && *p && !isspace(*p))
2639                                break;
2640                        if (neg)
2641                                val = -val;
2642                        s += len;
2643                        left -= len;
2644
2645                        if(neg)
2646                                continue;
2647                        if ((min && val < *min) || (max && val > *max))
2648                                continue;
2649                        *i = val;
2650                } else {
2651                        p = buf;
2652                        if (!first)
2653                                *p++ = '\t';
2654                        sprintf(p, "%lu", convdiv * (*i) / convmul);
2655                        len = strlen(buf);
2656                        if (len > left)
2657                                len = left;
2658                        if(copy_to_user(s, buf, len))
2659                                return -EFAULT;
2660                        left -= len;
2661                        s += len;
2662                }
2663        }
2664
2665        if (!write && !first && left) {
2666                if(put_user('\n', s))
2667                        return -EFAULT;
2668                left--, s++;
2669        }
2670        if (write) {
2671                while (left) {
2672                        char c;
2673                        if (get_user(c, s++))
2674                                return -EFAULT;
2675                        if (!isspace(c))
2676                                break;
2677                        left--;
2678                }
2679        }
2680        if (write && first)
2681                return -EINVAL;
2682        *lenp -= left;
2683        *ppos += *lenp;
2684        return 0;
2685#undef TMPBUFLEN
2686}
2687
2688static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2689                                     void __user *buffer,
2690                                     size_t *lenp, loff_t *ppos,
2691                                     unsigned long convmul,
2692                                     unsigned long convdiv)
2693{
2694        return __do_proc_doulongvec_minmax(table->data, table, write,
2695                        buffer, lenp, ppos, convmul, convdiv);
2696}
2697
2698/**
2699 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2700 * @table: the sysctl table
2701 * @write: %TRUE if this is a write to the sysctl file
2702 * @buffer: the user buffer
2703 * @lenp: the size of the user buffer
2704 * @ppos: file position
2705 *
2706 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2707 * values from/to the user buffer, treated as an ASCII string.
2708 *
2709 * This routine will ensure the values are within the range specified by
2710 * table->extra1 (min) and table->extra2 (max).
2711 *
2712 * Returns 0 on success.
2713 */
2714int proc_doulongvec_minmax(struct ctl_table *table, int write,
2715                           void __user *buffer, size_t *lenp, loff_t *ppos)
2716{
2717    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2718}
2719
2720/**
2721 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2722 * @table: the sysctl table
2723 * @write: %TRUE if this is a write to the sysctl file
2724 * @buffer: the user buffer
2725 * @lenp: the size of the user buffer
2726 * @ppos: file position
2727 *
2728 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2729 * values from/to the user buffer, treated as an ASCII string. The values
2730 * are treated as milliseconds, and converted to jiffies when they are stored.
2731 *
2732 * This routine will ensure the values are within the range specified by
2733 * table->extra1 (min) and table->extra2 (max).
2734 *
2735 * Returns 0 on success.
2736 */
2737int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2738                                      void __user *buffer,
2739                                      size_t *lenp, loff_t *ppos)
2740{
2741    return do_proc_doulongvec_minmax(table, write, buffer,
2742                                     lenp, ppos, HZ, 1000l);
2743}
2744
2745
2746static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2747                                         int *valp,
2748                                         int write, void *data)
2749{
2750        if (write) {
2751                if (*lvalp > LONG_MAX / HZ)
2752                        return 1;
2753                *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2754        } else {
2755                int val = *valp;
2756                unsigned long lval;
2757                if (val < 0) {
2758                        *negp = -1;
2759                        lval = (unsigned long)-val;
2760                } else {
2761                        *negp = 0;
2762                        lval = (unsigned long)val;
2763                }
2764                *lvalp = lval / HZ;
2765        }
2766        return 0;
2767}
2768
2769static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2770                                                int *valp,
2771                                                int write, void *data)
2772{
2773        if (write) {
2774                if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2775                        return 1;
2776                *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2777        } else {
2778                int val = *valp;
2779                unsigned long lval;
2780                if (val < 0) {
2781                        *negp = -1;
2782                        lval = (unsigned long)-val;
2783                } else {
2784                        *negp = 0;
2785                        lval = (unsigned long)val;
2786                }
2787                *lvalp = jiffies_to_clock_t(lval);
2788        }
2789        return 0;
2790}
2791
2792static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2793                                            int *valp,
2794                                            int write, void *data)
2795{
2796        if (write) {
2797                *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2798        } else {
2799                int val = *valp;
2800                unsigned long lval;
2801                if (val < 0) {
2802                        *negp = -1;
2803                        lval = (unsigned long)-val;
2804                } else {
2805                        *negp = 0;
2806                        lval = (unsigned long)val;
2807                }
2808                *lvalp = jiffies_to_msecs(lval);
2809        }
2810        return 0;
2811}
2812
2813/**
2814 * proc_dointvec_jiffies - read a vector of integers as seconds
2815 * @table: the sysctl table
2816 * @write: %TRUE if this is a write to the sysctl file
2817 * @buffer: the user buffer
2818 * @lenp: the size of the user buffer
2819 * @ppos: file position
2820 *
2821 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2822 * values from/to the user buffer, treated as an ASCII string. 
2823 * The values read are assumed to be in seconds, and are converted into
2824 * jiffies.
2825 *
2826 * Returns 0 on success.
2827 */
2828int proc_dointvec_jiffies(struct ctl_table *table, int write,
2829                          void __user *buffer, size_t *lenp, loff_t *ppos)
2830{
2831    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2832                            do_proc_dointvec_jiffies_conv,NULL);
2833}
2834
2835/**
2836 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2837 * @table: the sysctl table
2838 * @write: %TRUE if this is a write to the sysctl file
2839 * @buffer: the user buffer
2840 * @lenp: the size of the user buffer
2841 * @ppos: pointer to the file position
2842 *
2843 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2844 * values from/to the user buffer, treated as an ASCII string. 
2845 * The values read are assumed to be in 1/USER_HZ seconds, and 
2846 * are converted into jiffies.
2847 *
2848 * Returns 0 on success.
2849 */
2850int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2851                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2852{
2853    return do_proc_dointvec(table,write,buffer,lenp,ppos,
2854                            do_proc_dointvec_userhz_jiffies_conv,NULL);
2855}
2856
2857/**
2858 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2859 * @table: the sysctl table
2860 * @write: %TRUE if this is a write to the sysctl file
2861 * @buffer: the user buffer
2862 * @lenp: the size of the user buffer
2863 * @ppos: file position
2864 * @ppos: the current position in the file
2865 *
2866 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2867 * values from/to the user buffer, treated as an ASCII string. 
2868 * The values read are assumed to be in 1/1000 seconds, and 
2869 * are converted into jiffies.
2870 *
2871 * Returns 0 on success.
2872 */
2873int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2874                             void __user *buffer, size_t *lenp, loff_t *ppos)
2875{
2876        return do_proc_dointvec(table, write, buffer, lenp, ppos,
2877                                do_proc_dointvec_ms_jiffies_conv, NULL);
2878}
2879
2880static int proc_do_cad_pid(struct ctl_table *table, int write,
2881                           void __user *buffer, size_t *lenp, loff_t *ppos)
2882{
2883        struct pid *new_pid;
2884        pid_t tmp;
2885        int r;
2886
2887        tmp = pid_vnr(cad_pid);
2888
2889        r = __do_proc_dointvec(&tmp, table, write, buffer,
2890                               lenp, ppos, NULL, NULL);
2891        if (r || !write)
2892                return r;
2893
2894        new_pid = find_get_pid(tmp);
2895        if (!new_pid)
2896                return -ESRCH;
2897
2898        put_pid(xchg(&cad_pid, new_pid));
2899        return 0;
2900}
2901
2902#else /* CONFIG_PROC_FS */
2903
2904int proc_dostring(struct ctl_table *table, int write,
2905                  void __user *buffer, size_t *lenp, loff_t *ppos)
2906{
2907        return -ENOSYS;
2908}
2909
2910int proc_dointvec(struct ctl_table *table, int write,
2911                  void __user *buffer, size_t *lenp, loff_t *ppos)
2912{
2913        return -ENOSYS;
2914}
2915
2916int proc_dointvec_minmax(struct ctl_table *table, int write,
2917                    void __user *buffer, size_t *lenp, loff_t *ppos)
2918{
2919        return -ENOSYS;
2920}
2921
2922int proc_dointvec_jiffies(struct ctl_table *table, int write,
2923                    void __user *buffer, size_t *lenp, loff_t *ppos)
2924{
2925        return -ENOSYS;
2926}
2927
2928int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2929                    void __user *buffer, size_t *lenp, loff_t *ppos)
2930{
2931        return -ENOSYS;
2932}
2933
2934int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2935                             void __user *buffer, size_t *lenp, loff_t *ppos)
2936{
2937        return -ENOSYS;
2938}
2939
2940int proc_doulongvec_minmax(struct ctl_table *table, int write,
2941                    void __user *buffer, size_t *lenp, loff_t *ppos)
2942{
2943        return -ENOSYS;
2944}
2945
2946int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2947                                      void __user *buffer,
2948                                      size_t *lenp, loff_t *ppos)
2949{
2950    return -ENOSYS;
2951}
2952
2953
2954#endif /* CONFIG_PROC_FS */
2955
2956
2957#ifdef CONFIG_SYSCTL_SYSCALL
2958/*
2959 * General sysctl support routines 
2960 */
2961
2962/* The generic sysctl data routine (used if no strategy routine supplied) */
2963int sysctl_data(struct ctl_table *table,
2964                void __user *oldval, size_t __user *oldlenp,
2965                void __user *newval, size_t newlen)
2966{
2967        size_t len;
2968
2969        /* Get out of I don't have a variable */
2970        if (!table->data || !table->maxlen)
2971                return -ENOTDIR;
2972
2973        if (oldval && oldlenp) {
2974                if (get_user(len, oldlenp))
2975                        return -EFAULT;
2976                if (len) {
2977                        if (len > table->maxlen)
2978                                len = table->maxlen;
2979                        if (copy_to_user(oldval, table->data, len))
2980                                return -EFAULT;
2981                        if (put_user(len, oldlenp))
2982                                return -EFAULT;
2983                }
2984        }
2985
2986        if (newval && newlen) {
2987                if (newlen > table->maxlen)
2988                        newlen = table->maxlen;
2989
2990                if (copy_from_user(table->data, newval, newlen))
2991                        return -EFAULT;
2992        }
2993        return 1;
2994}
2995
2996/* The generic string strategy routine: */
2997int sysctl_string(struct ctl_table *table,
2998                  void __user *oldval, size_t __user *oldlenp,
2999                  void __user *newval, size_t newlen)
3000{
3001        if (!table->data || !table->maxlen) 
3002                return -ENOTDIR;
3003        
3004        if (oldval && oldlenp) {
3005                size_t bufsize;
3006                if (get_user(bufsize, oldlenp))
3007                        return -EFAULT;
3008                if (bufsize) {
3009                        size_t len = strlen(table->data), copied;
3010
3011                        /* This shouldn't trigger for a well-formed sysctl */
3012                        if (len > table->maxlen)
3013                                len = table->maxlen;
3014
3015                        /* Copy up to a max of bufsize-1 bytes of the string */
3016                        copied = (len >= bufsize) ? bufsize - 1 : len;
3017
3018                        if (copy_to_user(oldval, table->data, copied) ||
3019                            put_user(0, (char __user *)(oldval + copied)))
3020                                return -EFAULT;
3021                        if (put_user(len, oldlenp))
3022                                return -EFAULT;
3023                }
3024        }
3025        if (newval && newlen) {
3026                size_t len = newlen;
3027                if (len > table->maxlen)
3028                        len = table->maxlen;
3029                if(copy_from_user(table->data, newval, len))
3030                        return -EFAULT;
3031                if (len == table->maxlen)
3032                        len--;
3033                ((char *) table->data)[len] = 0;
3034        }
3035        return 1;
3036}
3037
3038/*
3039 * This function makes sure that all of the integers in the vector
3040 * are between the minimum and maximum values given in the arrays
3041 * table->extra1 and table->extra2, respectively.
3042 */
3043int sysctl_intvec(struct ctl_table *table,
3044                void __user *oldval, size_t __user *oldlenp,
3045                void __user *newval, size_t newlen)
3046{
3047
3048        if (newval && newlen) {
3049                int __user *vec = (int __user *) newval;
3050                int *min = (int *) table->extra1;
3051                int *max = (int *) table->extra2;
3052                size_t length;
3053                int i;
3054
3055                if (newlen % sizeof(int) != 0)
3056                        return -EINVAL;
3057
3058                if (!table->extra1 && !table->extra2)
3059                        return 0;
3060
3061                if (newlen > table->maxlen)
3062                        newlen = table->maxlen;
3063                length = newlen / sizeof(int);
3064
3065                for (i = 0; i < length; i++) {
3066                        int value;
3067                        if (get_user(value, vec + i))
3068                                return -EFAULT;
3069                        if (min && value < min[i])
3070                                return -EINVAL;
3071                        if (max && value > max[i])
3072                                return -EINVAL;
3073                }
3074        }
3075        return 0;
3076}
3077
3078/* Strategy function to convert jiffies to seconds */ 
3079int sysctl_jiffies(struct ctl_table *table,
3080                void __user *oldval, size_t __user *oldlenp,
3081                void __user *newval, size_t newlen)
3082{
3083        if (oldval && oldlenp) {
3084                size_t olen;
3085
3086                if (get_user(olen, oldlenp))
3087                        return -EFAULT;
3088                if (olen) {
3089                        int val;
3090
3091                        if (olen < sizeof(int))
3092                                return -EINVAL;
3093
3094                        val = *(int *)(table->data) / HZ;
3095                        if (put_user(val, (int __user *)oldval))
3096                                return -EFAULT;
3097                        if (put_user(sizeof(int), oldlenp))
3098                                return -EFAULT;
3099                }
3100        }
3101        if (newval && newlen) { 
3102                int new;
3103                if (newlen != sizeof(int))
3104                        return -EINVAL; 
3105                if (get_user(new, (int __user *)newval))
3106                        return -EFAULT;
3107                *(int *)(table->data) = new*HZ; 
3108        }
3109        return 1;
3110}
3111
3112/* Strategy function to convert jiffies to seconds */ 
3113int sysctl_ms_jiffies(struct ctl_table *table,
3114                void __user *oldval, size_t __user *oldlenp,
3115                void __user *newval, size_t newlen)
3116{
3117        if (oldval && oldlenp) {
3118                size_t olen;
3119
3120                if (get_user(olen, oldlenp))
3121                        return -EFAULT;
3122                if (olen) {
3123                        int val;
3124
3125                        if (olen < sizeof(int))
3126                                return -EINVAL;
3127
3128                        val = jiffies_to_msecs(*(int *)(table->data));
3129                        if (put_user(val, (int __user *)oldval))
3130                                return -EFAULT;
3131                        if (put_user(sizeof(int), oldlenp))
3132                                return -EFAULT;
3133                }
3134        }
3135        if (newval && newlen) { 
3136                int new;
3137                if (newlen != sizeof(int))
3138                        return -EINVAL; 
3139                if (get_user(new, (int __user *)newval))
3140                        return -EFAULT;
3141                *(int *)(table->data) = msecs_to_jiffies(new);
3142        }
3143        return 1;
3144}
3145
3146
3147
3148#else /* CONFIG_SYSCTL_SYSCALL */
3149
3150
3151SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3152{
3153        struct __sysctl_args tmp;
3154        int error;
3155
3156        if (copy_from_user(&tmp, args, sizeof(tmp)))
3157                return -EFAULT;
3158
3159        error = deprecated_sysctl_warning(&tmp);
3160
3161        /* If no error reading the parameters then just -ENOSYS ... */
3162        if (!error)
3163                error = -ENOSYS;
3164
3165        return error;
3166}
3167
3168int sysctl_data(struct ctl_table *table,
3169                  void __user *oldval, size_t __user *oldlenp,
3170                  void __user *newval, size_t newlen)
3171{
3172        return -ENOSYS;
3173}
3174
3175int sysctl_string(struct ctl_table *table,
3176                  void __user *oldval, size_t __user *oldlenp,
3177                  void __user *newval, size_t newlen)
3178{
3179        return -ENOSYS;
3180}
3181
3182int sysctl_intvec(struct ctl_table *table,
3183                void __user *oldval, size_t __user *oldlenp,
3184                void __user *newval, size_t newlen)
3185{
3186        return -ENOSYS;
3187}
3188
3189int sysctl_jiffies(struct ctl_table *table,
3190                void __user *oldval, size_t __user *oldlenp,
3191                void __user *newval, size_t newlen)
3192{
3193        return -ENOSYS;
3194}
3195
3196int sysctl_ms_jiffies(struct ctl_table *table,
3197                void __user *oldval, size_t __user *oldlenp,
3198                void __user *newval, size_t newlen)
3199{
3200        return -ENOSYS;
3201}
3202
3203#endif /* CONFIG_SYSCTL_SYSCALL */
3204
3205static int deprecated_sysctl_warning(struct __sysctl_args *args)
3206{
3207        static int msg_count;
3208        int name[CTL_MAXNAME];
3209        int i;
3210
3211        /* Check args->nlen. */
3212        if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3213                return -ENOTDIR;
3214
3215        /* Read in the sysctl name for better debug message logging */
3216        for (i = 0; i < args->nlen; i++)
3217                if (get_user(name[i], args->name + i))
3218                        return -EFAULT;
3219
3220        /* Ignore accesses to kernel.version */
3221        if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3222                return 0;
3223
3224        if (msg_count < 5) {
3225                msg_count++;
3226                printk(KERN_INFO
3227                        "warning: process `%s' used the deprecated sysctl "
3228                        "system call with ", current->comm);
3229                for (i = 0; i < args->nlen; i++)
3230                        printk("%d.", name[i]);
3231                printk("\n");
3232        }
3233        return 0;
3234}
3235
3236/*
3237 * No sense putting this after each symbol definition, twice,
3238 * exception granted :-)
3239 */
3240EXPORT_SYMBOL(proc_dointvec);
3241EXPORT_SYMBOL(proc_dointvec_jiffies);
3242EXPORT_SYMBOL(proc_dointvec_minmax);
3243EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3244EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3245EXPORT_SYMBOL(proc_dostring);
3246EXPORT_SYMBOL(proc_doulongvec_minmax);
3247EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3248EXPORT_SYMBOL(register_sysctl_table);
3249EXPORT_SYMBOL(register_sysctl_paths);
3250EXPORT_SYMBOL(sysctl_intvec);
3251EXPORT_SYMBOL(sysctl_jiffies);
3252EXPORT_SYMBOL(sysctl_ms_jiffies);
3253EXPORT_SYMBOL(sysctl_string);
3254EXPORT_SYMBOL(sysctl_data);
3255EXPORT_SYMBOL(unregister_sysctl_table);
3256