linux/net/ipv4/sysctl_net_ipv4.c
<<
>>
Prefs
   1/*
   2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
   3 *
   4 * Begun April 1, 1996, Mike Shaver.
   5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/module.h>
  10#include <linux/sysctl.h>
  11#include <linux/igmp.h>
  12#include <linux/inetdevice.h>
  13#include <linux/seqlock.h>
  14#include <linux/init.h>
  15#include <linux/slab.h>
  16#include <linux/nsproxy.h>
  17#include <linux/swap.h>
  18#include <net/snmp.h>
  19#include <net/icmp.h>
  20#include <net/ip.h>
  21#include <net/route.h>
  22#include <net/tcp.h>
  23#include <net/udp.h>
  24#include <net/cipso_ipv4.h>
  25#include <net/inet_frag.h>
  26#include <net/ping.h>
  27#include <net/protocol.h>
  28
  29static int zero;
  30static int one = 1;
  31static int four = 4;
  32static int thousand = 1000;
  33static int gso_max_segs = GSO_MAX_SEGS;
  34static int tcp_retr1_max = 255;
  35static int ip_local_port_range_min[] = { 1, 1 };
  36static int ip_local_port_range_max[] = { 65535, 65535 };
  37static int tcp_adv_win_scale_min = -31;
  38static int tcp_adv_win_scale_max = 31;
  39static int ip_privileged_port_min;
  40static int ip_privileged_port_max = 65535;
  41static int ip_ttl_min = 1;
  42static int ip_ttl_max = 255;
  43static int tcp_syn_retries_min = 1;
  44static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
  45static int ip_ping_group_range_min[] = { 0, 0 };
  46static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
  47
  48/* Update system visible IP port range */
  49static void set_local_port_range(struct net *net, int range[2])
  50{
  51        bool same_parity = !((range[0] ^ range[1]) & 1);
  52
  53        write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
  54        if (same_parity && !net->ipv4.ip_local_ports.warned) {
  55                net->ipv4.ip_local_ports.warned = true;
  56                pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
  57        }
  58        net->ipv4.ip_local_ports.range[0] = range[0];
  59        net->ipv4.ip_local_ports.range[1] = range[1];
  60        write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
  61}
  62
  63/* Validate changes from /proc interface. */
  64static int ipv4_local_port_range(struct ctl_table *table, int write,
  65                                 void __user *buffer,
  66                                 size_t *lenp, loff_t *ppos)
  67{
  68        struct net *net =
  69                container_of(table->data, struct net, ipv4.ip_local_ports.range);
  70        int ret;
  71        int range[2];
  72        struct ctl_table tmp = {
  73                .data = &range,
  74                .maxlen = sizeof(range),
  75                .mode = table->mode,
  76                .extra1 = &ip_local_port_range_min,
  77                .extra2 = &ip_local_port_range_max,
  78        };
  79
  80        inet_get_local_port_range(net, &range[0], &range[1]);
  81
  82        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  83
  84        if (write && ret == 0) {
  85                /* Ensure that the upper limit is not smaller than the lower,
  86                 * and that the lower does not encroach upon the privileged
  87                 * port limit.
  88                 */
  89                if ((range[1] < range[0]) ||
  90                    (range[0] < net->ipv4.sysctl_ip_prot_sock))
  91                        ret = -EINVAL;
  92                else
  93                        set_local_port_range(net, range);
  94        }
  95
  96        return ret;
  97}
  98
  99/* Validate changes from /proc interface. */
 100static int ipv4_privileged_ports(struct ctl_table *table, int write,
 101                                void __user *buffer, size_t *lenp, loff_t *ppos)
 102{
 103        struct net *net = container_of(table->data, struct net,
 104            ipv4.sysctl_ip_prot_sock);
 105        int ret;
 106        int pports;
 107        int range[2];
 108        struct ctl_table tmp = {
 109                .data = &pports,
 110                .maxlen = sizeof(pports),
 111                .mode = table->mode,
 112                .extra1 = &ip_privileged_port_min,
 113                .extra2 = &ip_privileged_port_max,
 114        };
 115
 116        pports = net->ipv4.sysctl_ip_prot_sock;
 117
 118        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 119
 120        if (write && ret == 0) {
 121                inet_get_local_port_range(net, &range[0], &range[1]);
 122                /* Ensure that the local port range doesn't overlap with the
 123                 * privileged port range.
 124                 */
 125                if (range[0] < pports)
 126                        ret = -EINVAL;
 127                else
 128                        net->ipv4.sysctl_ip_prot_sock = pports;
 129        }
 130
 131        return ret;
 132}
 133
 134static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
 135{
 136        kgid_t *data = table->data;
 137        struct net *net =
 138                container_of(table->data, struct net, ipv4.ping_group_range.range);
 139        unsigned int seq;
 140        do {
 141                seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
 142
 143                *low = data[0];
 144                *high = data[1];
 145        } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
 146}
 147
 148/* Update system visible IP port range */
 149static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
 150{
 151        kgid_t *data = table->data;
 152        struct net *net =
 153                container_of(table->data, struct net, ipv4.ping_group_range.range);
 154        write_seqlock(&net->ipv4.ping_group_range.lock);
 155        data[0] = low;
 156        data[1] = high;
 157        write_sequnlock(&net->ipv4.ping_group_range.lock);
 158}
 159
 160/* Validate changes from /proc interface. */
 161static int ipv4_ping_group_range(struct ctl_table *table, int write,
 162                                 void __user *buffer,
 163                                 size_t *lenp, loff_t *ppos)
 164{
 165        struct user_namespace *user_ns = current_user_ns();
 166        int ret;
 167        gid_t urange[2];
 168        kgid_t low, high;
 169        struct ctl_table tmp = {
 170                .data = &urange,
 171                .maxlen = sizeof(urange),
 172                .mode = table->mode,
 173                .extra1 = &ip_ping_group_range_min,
 174                .extra2 = &ip_ping_group_range_max,
 175        };
 176
 177        inet_get_ping_group_range_table(table, &low, &high);
 178        urange[0] = from_kgid_munged(user_ns, low);
 179        urange[1] = from_kgid_munged(user_ns, high);
 180        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 181
 182        if (write && ret == 0) {
 183                low = make_kgid(user_ns, urange[0]);
 184                high = make_kgid(user_ns, urange[1]);
 185                if (!gid_valid(low) || !gid_valid(high) ||
 186                    (urange[1] < urange[0]) || gid_lt(high, low)) {
 187                        low = make_kgid(&init_user_ns, 1);
 188                        high = make_kgid(&init_user_ns, 0);
 189                }
 190                set_ping_group_range(table, low, high);
 191        }
 192
 193        return ret;
 194}
 195
 196static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 197                                       void __user *buffer, size_t *lenp, loff_t *ppos)
 198{
 199        char val[TCP_CA_NAME_MAX];
 200        struct ctl_table tbl = {
 201                .data = val,
 202                .maxlen = TCP_CA_NAME_MAX,
 203        };
 204        int ret;
 205
 206        tcp_get_default_congestion_control(val);
 207
 208        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 209        if (write && ret == 0)
 210                ret = tcp_set_default_congestion_control(val);
 211        return ret;
 212}
 213
 214static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
 215                                                 int write,
 216                                                 void __user *buffer, size_t *lenp,
 217                                                 loff_t *ppos)
 218{
 219        struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
 220        int ret;
 221
 222        tbl.data = kmalloc(tbl.maxlen, GFP_USER);
 223        if (!tbl.data)
 224                return -ENOMEM;
 225        tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
 226        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 227        kfree(tbl.data);
 228        return ret;
 229}
 230
 231static int proc_allowed_congestion_control(struct ctl_table *ctl,
 232                                           int write,
 233                                           void __user *buffer, size_t *lenp,
 234                                           loff_t *ppos)
 235{
 236        struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
 237        int ret;
 238
 239        tbl.data = kmalloc(tbl.maxlen, GFP_USER);
 240        if (!tbl.data)
 241                return -ENOMEM;
 242
 243        tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
 244        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 245        if (write && ret == 0)
 246                ret = tcp_set_allowed_congestion_control(tbl.data);
 247        kfree(tbl.data);
 248        return ret;
 249}
 250
 251static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 252                                 void __user *buffer, size_t *lenp,
 253                                 loff_t *ppos)
 254{
 255        struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 256        struct tcp_fastopen_context *ctxt;
 257        int ret;
 258        u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
 259
 260        tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 261        if (!tbl.data)
 262                return -ENOMEM;
 263
 264        rcu_read_lock();
 265        ctxt = rcu_dereference(tcp_fastopen_ctx);
 266        if (ctxt)
 267                memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
 268        else
 269                memset(user_key, 0, sizeof(user_key));
 270        rcu_read_unlock();
 271
 272        snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
 273                user_key[0], user_key[1], user_key[2], user_key[3]);
 274        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 275
 276        if (write && ret == 0) {
 277                if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
 278                           user_key + 2, user_key + 3) != 4) {
 279                        ret = -EINVAL;
 280                        goto bad_key;
 281                }
 282                /* Generate a dummy secret but don't publish it. This
 283                 * is needed so we don't regenerate a new key on the
 284                 * first invocation of tcp_fastopen_cookie_gen
 285                 */
 286                tcp_fastopen_init_key_once(false);
 287                tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
 288        }
 289
 290bad_key:
 291        pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
 292               user_key[0], user_key[1], user_key[2], user_key[3],
 293               (char *)tbl.data, ret);
 294        kfree(tbl.data);
 295        return ret;
 296}
 297
 298static void proc_configure_early_demux(int enabled, int protocol)
 299{
 300        struct net_protocol *ipprot;
 301#if IS_ENABLED(CONFIG_IPV6)
 302        struct inet6_protocol *ip6prot;
 303#endif
 304
 305        rcu_read_lock();
 306
 307        ipprot = rcu_dereference(inet_protos[protocol]);
 308        if (ipprot)
 309                ipprot->early_demux = enabled ? ipprot->early_demux_handler :
 310                                                NULL;
 311
 312#if IS_ENABLED(CONFIG_IPV6)
 313        ip6prot = rcu_dereference(inet6_protos[protocol]);
 314        if (ip6prot)
 315                ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
 316                                                 NULL;
 317#endif
 318        rcu_read_unlock();
 319}
 320
 321static int proc_tcp_early_demux(struct ctl_table *table, int write,
 322                                void __user *buffer, size_t *lenp, loff_t *ppos)
 323{
 324        int ret = 0;
 325
 326        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 327
 328        if (write && !ret) {
 329                int enabled = init_net.ipv4.sysctl_tcp_early_demux;
 330
 331                proc_configure_early_demux(enabled, IPPROTO_TCP);
 332        }
 333
 334        return ret;
 335}
 336
 337static int proc_udp_early_demux(struct ctl_table *table, int write,
 338                                void __user *buffer, size_t *lenp, loff_t *ppos)
 339{
 340        int ret = 0;
 341
 342        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 343
 344        if (write && !ret) {
 345                int enabled = init_net.ipv4.sysctl_udp_early_demux;
 346
 347                proc_configure_early_demux(enabled, IPPROTO_UDP);
 348        }
 349
 350        return ret;
 351}
 352
 353static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
 354                                             int write,
 355                                             void __user *buffer,
 356                                             size_t *lenp, loff_t *ppos)
 357{
 358        int ret;
 359
 360        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 361        if (write && ret == 0)
 362                tcp_fastopen_active_timeout_reset();
 363        return ret;
 364}
 365
 366static struct ctl_table ipv4_table[] = {
 367        {
 368                .procname       = "tcp_timestamps",
 369                .data           = &sysctl_tcp_timestamps,
 370                .maxlen         = sizeof(int),
 371                .mode           = 0644,
 372                .proc_handler   = proc_dointvec
 373        },
 374        {
 375                .procname       = "tcp_window_scaling",
 376                .data           = &sysctl_tcp_window_scaling,
 377                .maxlen         = sizeof(int),
 378                .mode           = 0644,
 379                .proc_handler   = proc_dointvec
 380        },
 381        {
 382                .procname       = "tcp_sack",
 383                .data           = &sysctl_tcp_sack,
 384                .maxlen         = sizeof(int),
 385                .mode           = 0644,
 386                .proc_handler   = proc_dointvec
 387        },
 388        {
 389                .procname       = "tcp_retrans_collapse",
 390                .data           = &sysctl_tcp_retrans_collapse,
 391                .maxlen         = sizeof(int),
 392                .mode           = 0644,
 393                .proc_handler   = proc_dointvec
 394        },
 395        {
 396                .procname       = "tcp_max_orphans",
 397                .data           = &sysctl_tcp_max_orphans,
 398                .maxlen         = sizeof(int),
 399                .mode           = 0644,
 400                .proc_handler   = proc_dointvec
 401        },
 402        {
 403                .procname       = "tcp_fastopen",
 404                .data           = &sysctl_tcp_fastopen,
 405                .maxlen         = sizeof(int),
 406                .mode           = 0644,
 407                .proc_handler   = proc_dointvec,
 408        },
 409        {
 410                .procname       = "tcp_fastopen_key",
 411                .mode           = 0600,
 412                .maxlen         = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
 413                .proc_handler   = proc_tcp_fastopen_key,
 414        },
 415        {
 416                .procname       = "tcp_fastopen_blackhole_timeout_sec",
 417                .data           = &sysctl_tcp_fastopen_blackhole_timeout,
 418                .maxlen         = sizeof(int),
 419                .mode           = 0644,
 420                .proc_handler   = proc_tfo_blackhole_detect_timeout,
 421                .extra1         = &zero,
 422        },
 423        {
 424                .procname       = "tcp_abort_on_overflow",
 425                .data           = &sysctl_tcp_abort_on_overflow,
 426                .maxlen         = sizeof(int),
 427                .mode           = 0644,
 428                .proc_handler   = proc_dointvec
 429        },
 430        {
 431                .procname       = "tcp_stdurg",
 432                .data           = &sysctl_tcp_stdurg,
 433                .maxlen         = sizeof(int),
 434                .mode           = 0644,
 435                .proc_handler   = proc_dointvec
 436        },
 437        {
 438                .procname       = "tcp_rfc1337",
 439                .data           = &sysctl_tcp_rfc1337,
 440                .maxlen         = sizeof(int),
 441                .mode           = 0644,
 442                .proc_handler   = proc_dointvec
 443        },
 444        {
 445                .procname       = "inet_peer_threshold",
 446                .data           = &inet_peer_threshold,
 447                .maxlen         = sizeof(int),
 448                .mode           = 0644,
 449                .proc_handler   = proc_dointvec
 450        },
 451        {
 452                .procname       = "inet_peer_minttl",
 453                .data           = &inet_peer_minttl,
 454                .maxlen         = sizeof(int),
 455                .mode           = 0644,
 456                .proc_handler   = proc_dointvec_jiffies,
 457        },
 458        {
 459                .procname       = "inet_peer_maxttl",
 460                .data           = &inet_peer_maxttl,
 461                .maxlen         = sizeof(int),
 462                .mode           = 0644,
 463                .proc_handler   = proc_dointvec_jiffies,
 464        },
 465        {
 466                .procname       = "tcp_fack",
 467                .data           = &sysctl_tcp_fack,
 468                .maxlen         = sizeof(int),
 469                .mode           = 0644,
 470                .proc_handler   = proc_dointvec
 471        },
 472        {
 473                .procname       = "tcp_recovery",
 474                .data           = &sysctl_tcp_recovery,
 475                .maxlen         = sizeof(int),
 476                .mode           = 0644,
 477                .proc_handler   = proc_dointvec,
 478        },
 479        {
 480                .procname       = "tcp_max_reordering",
 481                .data           = &sysctl_tcp_max_reordering,
 482                .maxlen         = sizeof(int),
 483                .mode           = 0644,
 484                .proc_handler   = proc_dointvec
 485        },
 486        {
 487                .procname       = "tcp_dsack",
 488                .data           = &sysctl_tcp_dsack,
 489                .maxlen         = sizeof(int),
 490                .mode           = 0644,
 491                .proc_handler   = proc_dointvec
 492        },
 493        {
 494                .procname       = "tcp_mem",
 495                .maxlen         = sizeof(sysctl_tcp_mem),
 496                .data           = &sysctl_tcp_mem,
 497                .mode           = 0644,
 498                .proc_handler   = proc_doulongvec_minmax,
 499        },
 500        {
 501                .procname       = "tcp_wmem",
 502                .data           = &sysctl_tcp_wmem,
 503                .maxlen         = sizeof(sysctl_tcp_wmem),
 504                .mode           = 0644,
 505                .proc_handler   = proc_dointvec_minmax,
 506                .extra1         = &one,
 507        },
 508        {
 509                .procname       = "tcp_rmem",
 510                .data           = &sysctl_tcp_rmem,
 511                .maxlen         = sizeof(sysctl_tcp_rmem),
 512                .mode           = 0644,
 513                .proc_handler   = proc_dointvec_minmax,
 514                .extra1         = &one,
 515        },
 516        {
 517                .procname       = "tcp_app_win",
 518                .data           = &sysctl_tcp_app_win,
 519                .maxlen         = sizeof(int),
 520                .mode           = 0644,
 521                .proc_handler   = proc_dointvec
 522        },
 523        {
 524                .procname       = "tcp_adv_win_scale",
 525                .data           = &sysctl_tcp_adv_win_scale,
 526                .maxlen         = sizeof(int),
 527                .mode           = 0644,
 528                .proc_handler   = proc_dointvec_minmax,
 529                .extra1         = &tcp_adv_win_scale_min,
 530                .extra2         = &tcp_adv_win_scale_max,
 531        },
 532        {
 533                .procname       = "tcp_frto",
 534                .data           = &sysctl_tcp_frto,
 535                .maxlen         = sizeof(int),
 536                .mode           = 0644,
 537                .proc_handler   = proc_dointvec
 538        },
 539        {
 540                .procname       = "tcp_min_rtt_wlen",
 541                .data           = &sysctl_tcp_min_rtt_wlen,
 542                .maxlen         = sizeof(int),
 543                .mode           = 0644,
 544                .proc_handler   = proc_dointvec
 545        },
 546        {
 547                .procname       = "tcp_low_latency",
 548                .data           = &sysctl_tcp_low_latency,
 549                .maxlen         = sizeof(int),
 550                .mode           = 0644,
 551                .proc_handler   = proc_dointvec
 552        },
 553        {
 554                .procname       = "tcp_no_metrics_save",
 555                .data           = &sysctl_tcp_nometrics_save,
 556                .maxlen         = sizeof(int),
 557                .mode           = 0644,
 558                .proc_handler   = proc_dointvec,
 559        },
 560        {
 561                .procname       = "tcp_moderate_rcvbuf",
 562                .data           = &sysctl_tcp_moderate_rcvbuf,
 563                .maxlen         = sizeof(int),
 564                .mode           = 0644,
 565                .proc_handler   = proc_dointvec,
 566        },
 567        {
 568                .procname       = "tcp_tso_win_divisor",
 569                .data           = &sysctl_tcp_tso_win_divisor,
 570                .maxlen         = sizeof(int),
 571                .mode           = 0644,
 572                .proc_handler   = proc_dointvec,
 573        },
 574        {
 575                .procname       = "tcp_congestion_control",
 576                .mode           = 0644,
 577                .maxlen         = TCP_CA_NAME_MAX,
 578                .proc_handler   = proc_tcp_congestion_control,
 579        },
 580        {
 581                .procname       = "tcp_workaround_signed_windows",
 582                .data           = &sysctl_tcp_workaround_signed_windows,
 583                .maxlen         = sizeof(int),
 584                .mode           = 0644,
 585                .proc_handler   = proc_dointvec
 586        },
 587        {
 588                .procname       = "tcp_limit_output_bytes",
 589                .data           = &sysctl_tcp_limit_output_bytes,
 590                .maxlen         = sizeof(int),
 591                .mode           = 0644,
 592                .proc_handler   = proc_dointvec
 593        },
 594        {
 595                .procname       = "tcp_challenge_ack_limit",
 596                .data           = &sysctl_tcp_challenge_ack_limit,
 597                .maxlen         = sizeof(int),
 598                .mode           = 0644,
 599                .proc_handler   = proc_dointvec
 600        },
 601        {
 602                .procname       = "tcp_slow_start_after_idle",
 603                .data           = &sysctl_tcp_slow_start_after_idle,
 604                .maxlen         = sizeof(int),
 605                .mode           = 0644,
 606                .proc_handler   = proc_dointvec
 607        },
 608#ifdef CONFIG_NETLABEL
 609        {
 610                .procname       = "cipso_cache_enable",
 611                .data           = &cipso_v4_cache_enabled,
 612                .maxlen         = sizeof(int),
 613                .mode           = 0644,
 614                .proc_handler   = proc_dointvec,
 615        },
 616        {
 617                .procname       = "cipso_cache_bucket_size",
 618                .data           = &cipso_v4_cache_bucketsize,
 619                .maxlen         = sizeof(int),
 620                .mode           = 0644,
 621                .proc_handler   = proc_dointvec,
 622        },
 623        {
 624                .procname       = "cipso_rbm_optfmt",
 625                .data           = &cipso_v4_rbm_optfmt,
 626                .maxlen         = sizeof(int),
 627                .mode           = 0644,
 628                .proc_handler   = proc_dointvec,
 629        },
 630        {
 631                .procname       = "cipso_rbm_strictvalid",
 632                .data           = &cipso_v4_rbm_strictvalid,
 633                .maxlen         = sizeof(int),
 634                .mode           = 0644,
 635                .proc_handler   = proc_dointvec,
 636        },
 637#endif /* CONFIG_NETLABEL */
 638        {
 639                .procname       = "tcp_available_congestion_control",
 640                .maxlen         = TCP_CA_BUF_MAX,
 641                .mode           = 0444,
 642                .proc_handler   = proc_tcp_available_congestion_control,
 643        },
 644        {
 645                .procname       = "tcp_allowed_congestion_control",
 646                .maxlen         = TCP_CA_BUF_MAX,
 647                .mode           = 0644,
 648                .proc_handler   = proc_allowed_congestion_control,
 649        },
 650        {
 651                .procname       = "tcp_thin_linear_timeouts",
 652                .data           = &sysctl_tcp_thin_linear_timeouts,
 653                .maxlen         = sizeof(int),
 654                .mode           = 0644,
 655                .proc_handler   = proc_dointvec
 656        },
 657        {
 658                .procname       = "tcp_early_retrans",
 659                .data           = &sysctl_tcp_early_retrans,
 660                .maxlen         = sizeof(int),
 661                .mode           = 0644,
 662                .proc_handler   = proc_dointvec_minmax,
 663                .extra1         = &zero,
 664                .extra2         = &four,
 665        },
 666        {
 667                .procname       = "tcp_min_tso_segs",
 668                .data           = &sysctl_tcp_min_tso_segs,
 669                .maxlen         = sizeof(int),
 670                .mode           = 0644,
 671                .proc_handler   = proc_dointvec_minmax,
 672                .extra1         = &one,
 673                .extra2         = &gso_max_segs,
 674        },
 675        {
 676                .procname       = "tcp_pacing_ss_ratio",
 677                .data           = &sysctl_tcp_pacing_ss_ratio,
 678                .maxlen         = sizeof(int),
 679                .mode           = 0644,
 680                .proc_handler   = proc_dointvec_minmax,
 681                .extra1         = &zero,
 682                .extra2         = &thousand,
 683        },
 684        {
 685                .procname       = "tcp_pacing_ca_ratio",
 686                .data           = &sysctl_tcp_pacing_ca_ratio,
 687                .maxlen         = sizeof(int),
 688                .mode           = 0644,
 689                .proc_handler   = proc_dointvec_minmax,
 690                .extra1         = &zero,
 691                .extra2         = &thousand,
 692        },
 693        {
 694                .procname       = "tcp_autocorking",
 695                .data           = &sysctl_tcp_autocorking,
 696                .maxlen         = sizeof(int),
 697                .mode           = 0644,
 698                .proc_handler   = proc_dointvec_minmax,
 699                .extra1         = &zero,
 700                .extra2         = &one,
 701        },
 702        {
 703                .procname       = "tcp_invalid_ratelimit",
 704                .data           = &sysctl_tcp_invalid_ratelimit,
 705                .maxlen         = sizeof(int),
 706                .mode           = 0644,
 707                .proc_handler   = proc_dointvec_ms_jiffies,
 708        },
 709        {
 710                .procname       = "icmp_msgs_per_sec",
 711                .data           = &sysctl_icmp_msgs_per_sec,
 712                .maxlen         = sizeof(int),
 713                .mode           = 0644,
 714                .proc_handler   = proc_dointvec_minmax,
 715                .extra1         = &zero,
 716        },
 717        {
 718                .procname       = "icmp_msgs_burst",
 719                .data           = &sysctl_icmp_msgs_burst,
 720                .maxlen         = sizeof(int),
 721                .mode           = 0644,
 722                .proc_handler   = proc_dointvec_minmax,
 723                .extra1         = &zero,
 724        },
 725        {
 726                .procname       = "udp_mem",
 727                .data           = &sysctl_udp_mem,
 728                .maxlen         = sizeof(sysctl_udp_mem),
 729                .mode           = 0644,
 730                .proc_handler   = proc_doulongvec_minmax,
 731        },
 732        {
 733                .procname       = "udp_rmem_min",
 734                .data           = &sysctl_udp_rmem_min,
 735                .maxlen         = sizeof(sysctl_udp_rmem_min),
 736                .mode           = 0644,
 737                .proc_handler   = proc_dointvec_minmax,
 738                .extra1         = &one
 739        },
 740        {
 741                .procname       = "udp_wmem_min",
 742                .data           = &sysctl_udp_wmem_min,
 743                .maxlen         = sizeof(sysctl_udp_wmem_min),
 744                .mode           = 0644,
 745                .proc_handler   = proc_dointvec_minmax,
 746                .extra1         = &one
 747        },
 748        { }
 749};
 750
 751static struct ctl_table ipv4_net_table[] = {
 752        {
 753                .procname       = "icmp_echo_ignore_all",
 754                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
 755                .maxlen         = sizeof(int),
 756                .mode           = 0644,
 757                .proc_handler   = proc_dointvec
 758        },
 759        {
 760                .procname       = "icmp_echo_ignore_broadcasts",
 761                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
 762                .maxlen         = sizeof(int),
 763                .mode           = 0644,
 764                .proc_handler   = proc_dointvec
 765        },
 766        {
 767                .procname       = "icmp_ignore_bogus_error_responses",
 768                .data           = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
 769                .maxlen         = sizeof(int),
 770                .mode           = 0644,
 771                .proc_handler   = proc_dointvec
 772        },
 773        {
 774                .procname       = "icmp_errors_use_inbound_ifaddr",
 775                .data           = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
 776                .maxlen         = sizeof(int),
 777                .mode           = 0644,
 778                .proc_handler   = proc_dointvec
 779        },
 780        {
 781                .procname       = "icmp_ratelimit",
 782                .data           = &init_net.ipv4.sysctl_icmp_ratelimit,
 783                .maxlen         = sizeof(int),
 784                .mode           = 0644,
 785                .proc_handler   = proc_dointvec_ms_jiffies,
 786        },
 787        {
 788                .procname       = "icmp_ratemask",
 789                .data           = &init_net.ipv4.sysctl_icmp_ratemask,
 790                .maxlen         = sizeof(int),
 791                .mode           = 0644,
 792                .proc_handler   = proc_dointvec
 793        },
 794        {
 795                .procname       = "ping_group_range",
 796                .data           = &init_net.ipv4.ping_group_range.range,
 797                .maxlen         = sizeof(gid_t)*2,
 798                .mode           = 0644,
 799                .proc_handler   = ipv4_ping_group_range,
 800        },
 801        {
 802                .procname       = "tcp_ecn",
 803                .data           = &init_net.ipv4.sysctl_tcp_ecn,
 804                .maxlen         = sizeof(int),
 805                .mode           = 0644,
 806                .proc_handler   = proc_dointvec
 807        },
 808        {
 809                .procname       = "tcp_ecn_fallback",
 810                .data           = &init_net.ipv4.sysctl_tcp_ecn_fallback,
 811                .maxlen         = sizeof(int),
 812                .mode           = 0644,
 813                .proc_handler   = proc_dointvec
 814        },
 815        {
 816                .procname       = "ip_dynaddr",
 817                .data           = &init_net.ipv4.sysctl_ip_dynaddr,
 818                .maxlen         = sizeof(int),
 819                .mode           = 0644,
 820                .proc_handler   = proc_dointvec
 821        },
 822        {
 823                .procname       = "ip_early_demux",
 824                .data           = &init_net.ipv4.sysctl_ip_early_demux,
 825                .maxlen         = sizeof(int),
 826                .mode           = 0644,
 827                .proc_handler   = proc_dointvec
 828        },
 829        {
 830                .procname       = "udp_early_demux",
 831                .data           = &init_net.ipv4.sysctl_udp_early_demux,
 832                .maxlen         = sizeof(int),
 833                .mode           = 0644,
 834                .proc_handler   = proc_udp_early_demux
 835        },
 836        {
 837                .procname       = "tcp_early_demux",
 838                .data           = &init_net.ipv4.sysctl_tcp_early_demux,
 839                .maxlen         = sizeof(int),
 840                .mode           = 0644,
 841                .proc_handler   = proc_tcp_early_demux
 842        },
 843        {
 844                .procname       = "ip_default_ttl",
 845                .data           = &init_net.ipv4.sysctl_ip_default_ttl,
 846                .maxlen         = sizeof(int),
 847                .mode           = 0644,
 848                .proc_handler   = proc_dointvec_minmax,
 849                .extra1         = &ip_ttl_min,
 850                .extra2         = &ip_ttl_max,
 851        },
 852        {
 853                .procname       = "ip_local_port_range",
 854                .maxlen         = sizeof(init_net.ipv4.ip_local_ports.range),
 855                .data           = &init_net.ipv4.ip_local_ports.range,
 856                .mode           = 0644,
 857                .proc_handler   = ipv4_local_port_range,
 858        },
 859        {
 860                .procname       = "ip_local_reserved_ports",
 861                .data           = &init_net.ipv4.sysctl_local_reserved_ports,
 862                .maxlen         = 65536,
 863                .mode           = 0644,
 864                .proc_handler   = proc_do_large_bitmap,
 865        },
 866        {
 867                .procname       = "ip_no_pmtu_disc",
 868                .data           = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
 869                .maxlen         = sizeof(int),
 870                .mode           = 0644,
 871                .proc_handler   = proc_dointvec
 872        },
 873        {
 874                .procname       = "ip_forward_use_pmtu",
 875                .data           = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
 876                .maxlen         = sizeof(int),
 877                .mode           = 0644,
 878                .proc_handler   = proc_dointvec,
 879        },
 880        {
 881                .procname       = "ip_nonlocal_bind",
 882                .data           = &init_net.ipv4.sysctl_ip_nonlocal_bind,
 883                .maxlen         = sizeof(int),
 884                .mode           = 0644,
 885                .proc_handler   = proc_dointvec
 886        },
 887        {
 888                .procname       = "fwmark_reflect",
 889                .data           = &init_net.ipv4.sysctl_fwmark_reflect,
 890                .maxlen         = sizeof(int),
 891                .mode           = 0644,
 892                .proc_handler   = proc_dointvec,
 893        },
 894        {
 895                .procname       = "tcp_fwmark_accept",
 896                .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
 897                .maxlen         = sizeof(int),
 898                .mode           = 0644,
 899                .proc_handler   = proc_dointvec,
 900        },
 901#ifdef CONFIG_NET_L3_MASTER_DEV
 902        {
 903                .procname       = "tcp_l3mdev_accept",
 904                .data           = &init_net.ipv4.sysctl_tcp_l3mdev_accept,
 905                .maxlen         = sizeof(int),
 906                .mode           = 0644,
 907                .proc_handler   = proc_dointvec_minmax,
 908                .extra1         = &zero,
 909                .extra2         = &one,
 910        },
 911#endif
 912        {
 913                .procname       = "tcp_mtu_probing",
 914                .data           = &init_net.ipv4.sysctl_tcp_mtu_probing,
 915                .maxlen         = sizeof(int),
 916                .mode           = 0644,
 917                .proc_handler   = proc_dointvec,
 918        },
 919        {
 920                .procname       = "tcp_base_mss",
 921                .data           = &init_net.ipv4.sysctl_tcp_base_mss,
 922                .maxlen         = sizeof(int),
 923                .mode           = 0644,
 924                .proc_handler   = proc_dointvec,
 925        },
 926        {
 927                .procname       = "tcp_probe_threshold",
 928                .data           = &init_net.ipv4.sysctl_tcp_probe_threshold,
 929                .maxlen         = sizeof(int),
 930                .mode           = 0644,
 931                .proc_handler   = proc_dointvec,
 932        },
 933        {
 934                .procname       = "tcp_probe_interval",
 935                .data           = &init_net.ipv4.sysctl_tcp_probe_interval,
 936                .maxlen         = sizeof(int),
 937                .mode           = 0644,
 938                .proc_handler   = proc_dointvec,
 939        },
 940        {
 941                .procname       = "igmp_link_local_mcast_reports",
 942                .data           = &init_net.ipv4.sysctl_igmp_llm_reports,
 943                .maxlen         = sizeof(int),
 944                .mode           = 0644,
 945                .proc_handler   = proc_dointvec
 946        },
 947        {
 948                .procname       = "igmp_max_memberships",
 949                .data           = &init_net.ipv4.sysctl_igmp_max_memberships,
 950                .maxlen         = sizeof(int),
 951                .mode           = 0644,
 952                .proc_handler   = proc_dointvec
 953        },
 954        {
 955                .procname       = "igmp_max_msf",
 956                .data           = &init_net.ipv4.sysctl_igmp_max_msf,
 957                .maxlen         = sizeof(int),
 958                .mode           = 0644,
 959                .proc_handler   = proc_dointvec
 960        },
 961#ifdef CONFIG_IP_MULTICAST
 962        {
 963                .procname       = "igmp_qrv",
 964                .data           = &init_net.ipv4.sysctl_igmp_qrv,
 965                .maxlen         = sizeof(int),
 966                .mode           = 0644,
 967                .proc_handler   = proc_dointvec_minmax,
 968                .extra1         = &one
 969        },
 970#endif
 971        {
 972                .procname       = "tcp_keepalive_time",
 973                .data           = &init_net.ipv4.sysctl_tcp_keepalive_time,
 974                .maxlen         = sizeof(int),
 975                .mode           = 0644,
 976                .proc_handler   = proc_dointvec_jiffies,
 977        },
 978        {
 979                .procname       = "tcp_keepalive_probes",
 980                .data           = &init_net.ipv4.sysctl_tcp_keepalive_probes,
 981                .maxlen         = sizeof(int),
 982                .mode           = 0644,
 983                .proc_handler   = proc_dointvec
 984        },
 985        {
 986                .procname       = "tcp_keepalive_intvl",
 987                .data           = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
 988                .maxlen         = sizeof(int),
 989                .mode           = 0644,
 990                .proc_handler   = proc_dointvec_jiffies,
 991        },
 992        {
 993                .procname       = "tcp_syn_retries",
 994                .data           = &init_net.ipv4.sysctl_tcp_syn_retries,
 995                .maxlen         = sizeof(int),
 996                .mode           = 0644,
 997                .proc_handler   = proc_dointvec_minmax,
 998                .extra1         = &tcp_syn_retries_min,
 999                .extra2         = &tcp_syn_retries_max
1000        },
1001        {
1002                .procname       = "tcp_synack_retries",
1003                .data           = &init_net.ipv4.sysctl_tcp_synack_retries,
1004                .maxlen         = sizeof(int),
1005                .mode           = 0644,
1006                .proc_handler   = proc_dointvec
1007        },
1008#ifdef CONFIG_SYN_COOKIES
1009        {
1010                .procname       = "tcp_syncookies",
1011                .data           = &init_net.ipv4.sysctl_tcp_syncookies,
1012                .maxlen         = sizeof(int),
1013                .mode           = 0644,
1014                .proc_handler   = proc_dointvec
1015        },
1016#endif
1017        {
1018                .procname       = "tcp_reordering",
1019                .data           = &init_net.ipv4.sysctl_tcp_reordering,
1020                .maxlen         = sizeof(int),
1021                .mode           = 0644,
1022                .proc_handler   = proc_dointvec
1023        },
1024        {
1025                .procname       = "tcp_retries1",
1026                .data           = &init_net.ipv4.sysctl_tcp_retries1,
1027                .maxlen         = sizeof(int),
1028                .mode           = 0644,
1029                .proc_handler   = proc_dointvec_minmax,
1030                .extra2         = &tcp_retr1_max
1031        },
1032        {
1033                .procname       = "tcp_retries2",
1034                .data           = &init_net.ipv4.sysctl_tcp_retries2,
1035                .maxlen         = sizeof(int),
1036                .mode           = 0644,
1037                .proc_handler   = proc_dointvec
1038        },
1039        {
1040                .procname       = "tcp_orphan_retries",
1041                .data           = &init_net.ipv4.sysctl_tcp_orphan_retries,
1042                .maxlen         = sizeof(int),
1043                .mode           = 0644,
1044                .proc_handler   = proc_dointvec
1045        },
1046        {
1047                .procname       = "tcp_fin_timeout",
1048                .data           = &init_net.ipv4.sysctl_tcp_fin_timeout,
1049                .maxlen         = sizeof(int),
1050                .mode           = 0644,
1051                .proc_handler   = proc_dointvec_jiffies,
1052        },
1053        {
1054                .procname       = "tcp_notsent_lowat",
1055                .data           = &init_net.ipv4.sysctl_tcp_notsent_lowat,
1056                .maxlen         = sizeof(unsigned int),
1057                .mode           = 0644,
1058                .proc_handler   = proc_douintvec,
1059        },
1060        {
1061                .procname       = "tcp_tw_reuse",
1062                .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
1063                .maxlen         = sizeof(int),
1064                .mode           = 0644,
1065                .proc_handler   = proc_dointvec
1066        },
1067        {
1068                .procname       = "tcp_max_tw_buckets",
1069                .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
1070                .maxlen         = sizeof(int),
1071                .mode           = 0644,
1072                .proc_handler   = proc_dointvec
1073        },
1074        {
1075                .procname       = "tcp_max_syn_backlog",
1076                .data           = &init_net.ipv4.sysctl_max_syn_backlog,
1077                .maxlen         = sizeof(int),
1078                .mode           = 0644,
1079                .proc_handler   = proc_dointvec
1080        },
1081#ifdef CONFIG_IP_ROUTE_MULTIPATH
1082        {
1083                .procname       = "fib_multipath_use_neigh",
1084                .data           = &init_net.ipv4.sysctl_fib_multipath_use_neigh,
1085                .maxlen         = sizeof(int),
1086                .mode           = 0644,
1087                .proc_handler   = proc_dointvec_minmax,
1088                .extra1         = &zero,
1089                .extra2         = &one,
1090        },
1091        {
1092                .procname       = "fib_multipath_hash_policy",
1093                .data           = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1094                .maxlen         = sizeof(int),
1095                .mode           = 0644,
1096                .proc_handler   = proc_dointvec_minmax,
1097                .extra1         = &zero,
1098                .extra2         = &one,
1099        },
1100#endif
1101        {
1102                .procname       = "ip_unprivileged_port_start",
1103                .maxlen         = sizeof(int),
1104                .data           = &init_net.ipv4.sysctl_ip_prot_sock,
1105                .mode           = 0644,
1106                .proc_handler   = ipv4_privileged_ports,
1107        },
1108#ifdef CONFIG_NET_L3_MASTER_DEV
1109        {
1110                .procname       = "udp_l3mdev_accept",
1111                .data           = &init_net.ipv4.sysctl_udp_l3mdev_accept,
1112                .maxlen         = sizeof(int),
1113                .mode           = 0644,
1114                .proc_handler   = proc_dointvec_minmax,
1115                .extra1         = &zero,
1116                .extra2         = &one,
1117        },
1118#endif
1119        { }
1120};
1121
1122static __net_init int ipv4_sysctl_init_net(struct net *net)
1123{
1124        struct ctl_table *table;
1125
1126        table = ipv4_net_table;
1127        if (!net_eq(net, &init_net)) {
1128                int i;
1129
1130                table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1131                if (!table)
1132                        goto err_alloc;
1133
1134                /* Update the variables to point into the current struct net */
1135                for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1136                        table[i].data += (void *)net - (void *)&init_net;
1137        }
1138
1139        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1140        if (!net->ipv4.ipv4_hdr)
1141                goto err_reg;
1142
1143        net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1144        if (!net->ipv4.sysctl_local_reserved_ports)
1145                goto err_ports;
1146
1147        return 0;
1148
1149err_ports:
1150        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1151err_reg:
1152        if (!net_eq(net, &init_net))
1153                kfree(table);
1154err_alloc:
1155        return -ENOMEM;
1156}
1157
1158static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1159{
1160        struct ctl_table *table;
1161
1162        kfree(net->ipv4.sysctl_local_reserved_ports);
1163        table = net->ipv4.ipv4_hdr->ctl_table_arg;
1164        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1165        kfree(table);
1166}
1167
1168static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1169        .init = ipv4_sysctl_init_net,
1170        .exit = ipv4_sysctl_exit_net,
1171};
1172
1173static __init int sysctl_ipv4_init(void)
1174{
1175        struct ctl_table_header *hdr;
1176
1177        hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1178        if (!hdr)
1179                return -ENOMEM;
1180
1181        if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1182                unregister_net_sysctl_table(hdr);
1183                return -ENOMEM;
1184        }
1185
1186        return 0;
1187}
1188
1189__initcall(sysctl_ipv4_init);
1190