linux/net/ipv4/sysctl_net_ipv4.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
   4 *
   5 * Begun April 1, 1996, Mike Shaver.
   6 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
   7 */
   8
   9#include <linux/mm.h>
  10#include <linux/module.h>
  11#include <linux/sysctl.h>
  12#include <linux/igmp.h>
  13#include <linux/inetdevice.h>
  14#include <linux/seqlock.h>
  15#include <linux/init.h>
  16#include <linux/slab.h>
  17#include <linux/nsproxy.h>
  18#include <linux/swap.h>
  19#include <net/snmp.h>
  20#include <net/icmp.h>
  21#include <net/ip.h>
  22#include <net/route.h>
  23#include <net/tcp.h>
  24#include <net/udp.h>
  25#include <net/cipso_ipv4.h>
  26#include <net/inet_frag.h>
  27#include <net/ping.h>
  28#include <net/protocol.h>
  29#include <net/netevent.h>
  30
  31static int zero;
  32static int one = 1;
  33static int four = 4;
  34static int thousand = 1000;
  35static int gso_max_segs = GSO_MAX_SEGS;
  36static int tcp_retr1_max = 255;
  37static int ip_local_port_range_min[] = { 1, 1 };
  38static int ip_local_port_range_max[] = { 65535, 65535 };
  39static int tcp_adv_win_scale_min = -31;
  40static int tcp_adv_win_scale_max = 31;
  41static int ip_privileged_port_min;
  42static int ip_privileged_port_max = 65535;
  43static int ip_ttl_min = 1;
  44static int ip_ttl_max = 255;
  45static int tcp_syn_retries_min = 1;
  46static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
  47static int ip_ping_group_range_min[] = { 0, 0 };
  48static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
  49
  50/* obsolete */
  51static int sysctl_tcp_low_latency __read_mostly;
  52
  53/* Update system visible IP port range */
  54static void set_local_port_range(struct net *net, int range[2])
  55{
  56        bool same_parity = !((range[0] ^ range[1]) & 1);
  57
  58        write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
  59        if (same_parity && !net->ipv4.ip_local_ports.warned) {
  60                net->ipv4.ip_local_ports.warned = true;
  61                pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
  62        }
  63        net->ipv4.ip_local_ports.range[0] = range[0];
  64        net->ipv4.ip_local_ports.range[1] = range[1];
  65        write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
  66}
  67
  68/* Validate changes from /proc interface. */
  69static int ipv4_local_port_range(struct ctl_table *table, int write,
  70                                 void __user *buffer,
  71                                 size_t *lenp, loff_t *ppos)
  72{
  73        struct net *net =
  74                container_of(table->data, struct net, ipv4.ip_local_ports.range);
  75        int ret;
  76        int range[2];
  77        struct ctl_table tmp = {
  78                .data = &range,
  79                .maxlen = sizeof(range),
  80                .mode = table->mode,
  81                .extra1 = &ip_local_port_range_min,
  82                .extra2 = &ip_local_port_range_max,
  83        };
  84
  85        inet_get_local_port_range(net, &range[0], &range[1]);
  86
  87        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  88
  89        if (write && ret == 0) {
  90                /* Ensure that the upper limit is not smaller than the lower,
  91                 * and that the lower does not encroach upon the privileged
  92                 * port limit.
  93                 */
  94                if ((range[1] < range[0]) ||
  95                    (range[0] < net->ipv4.sysctl_ip_prot_sock))
  96                        ret = -EINVAL;
  97                else
  98                        set_local_port_range(net, range);
  99        }
 100
 101        return ret;
 102}
 103
 104/* Validate changes from /proc interface. */
 105static int ipv4_privileged_ports(struct ctl_table *table, int write,
 106                                void __user *buffer, size_t *lenp, loff_t *ppos)
 107{
 108        struct net *net = container_of(table->data, struct net,
 109            ipv4.sysctl_ip_prot_sock);
 110        int ret;
 111        int pports;
 112        int range[2];
 113        struct ctl_table tmp = {
 114                .data = &pports,
 115                .maxlen = sizeof(pports),
 116                .mode = table->mode,
 117                .extra1 = &ip_privileged_port_min,
 118                .extra2 = &ip_privileged_port_max,
 119        };
 120
 121        pports = net->ipv4.sysctl_ip_prot_sock;
 122
 123        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 124
 125        if (write && ret == 0) {
 126                inet_get_local_port_range(net, &range[0], &range[1]);
 127                /* Ensure that the local port range doesn't overlap with the
 128                 * privileged port range.
 129                 */
 130                if (range[0] < pports)
 131                        ret = -EINVAL;
 132                else
 133                        net->ipv4.sysctl_ip_prot_sock = pports;
 134        }
 135
 136        return ret;
 137}
 138
 139static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
 140{
 141        kgid_t *data = table->data;
 142        struct net *net =
 143                container_of(table->data, struct net, ipv4.ping_group_range.range);
 144        unsigned int seq;
 145        do {
 146                seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
 147
 148                *low = data[0];
 149                *high = data[1];
 150        } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
 151}
 152
 153/* Update system visible IP port range */
 154static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
 155{
 156        kgid_t *data = table->data;
 157        struct net *net =
 158                container_of(table->data, struct net, ipv4.ping_group_range.range);
 159        write_seqlock(&net->ipv4.ping_group_range.lock);
 160        data[0] = low;
 161        data[1] = high;
 162        write_sequnlock(&net->ipv4.ping_group_range.lock);
 163}
 164
 165/* Validate changes from /proc interface. */
 166static int ipv4_ping_group_range(struct ctl_table *table, int write,
 167                                 void __user *buffer,
 168                                 size_t *lenp, loff_t *ppos)
 169{
 170        struct user_namespace *user_ns = current_user_ns();
 171        int ret;
 172        gid_t urange[2];
 173        kgid_t low, high;
 174        struct ctl_table tmp = {
 175                .data = &urange,
 176                .maxlen = sizeof(urange),
 177                .mode = table->mode,
 178                .extra1 = &ip_ping_group_range_min,
 179                .extra2 = &ip_ping_group_range_max,
 180        };
 181
 182        inet_get_ping_group_range_table(table, &low, &high);
 183        urange[0] = from_kgid_munged(user_ns, low);
 184        urange[1] = from_kgid_munged(user_ns, high);
 185        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 186
 187        if (write && ret == 0) {
 188                low = make_kgid(user_ns, urange[0]);
 189                high = make_kgid(user_ns, urange[1]);
 190                if (!gid_valid(low) || !gid_valid(high) ||
 191                    (urange[1] < urange[0]) || gid_lt(high, low)) {
 192                        low = make_kgid(&init_user_ns, 1);
 193                        high = make_kgid(&init_user_ns, 0);
 194                }
 195                set_ping_group_range(table, low, high);
 196        }
 197
 198        return ret;
 199}
 200
 201static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 202                                       void __user *buffer, size_t *lenp, loff_t *ppos)
 203{
 204        struct net *net = container_of(ctl->data, struct net,
 205                                       ipv4.tcp_congestion_control);
 206        char val[TCP_CA_NAME_MAX];
 207        struct ctl_table tbl = {
 208                .data = val,
 209                .maxlen = TCP_CA_NAME_MAX,
 210        };
 211        int ret;
 212
 213        tcp_get_default_congestion_control(net, val);
 214
 215        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 216        if (write && ret == 0)
 217                ret = tcp_set_default_congestion_control(net, val);
 218        return ret;
 219}
 220
 221static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
 222                                                 int write,
 223                                                 void __user *buffer, size_t *lenp,
 224                                                 loff_t *ppos)
 225{
 226        struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
 227        int ret;
 228
 229        tbl.data = kmalloc(tbl.maxlen, GFP_USER);
 230        if (!tbl.data)
 231                return -ENOMEM;
 232        tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
 233        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 234        kfree(tbl.data);
 235        return ret;
 236}
 237
 238static int proc_allowed_congestion_control(struct ctl_table *ctl,
 239                                           int write,
 240                                           void __user *buffer, size_t *lenp,
 241                                           loff_t *ppos)
 242{
 243        struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
 244        int ret;
 245
 246        tbl.data = kmalloc(tbl.maxlen, GFP_USER);
 247        if (!tbl.data)
 248                return -ENOMEM;
 249
 250        tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
 251        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 252        if (write && ret == 0)
 253                ret = tcp_set_allowed_congestion_control(tbl.data);
 254        kfree(tbl.data);
 255        return ret;
 256}
 257
 258static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 259                                 void __user *buffer, size_t *lenp,
 260                                 loff_t *ppos)
 261{
 262        struct net *net = container_of(table->data, struct net,
 263            ipv4.sysctl_tcp_fastopen);
 264        struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 265        struct tcp_fastopen_context *ctxt;
 266        int ret;
 267        u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
 268
 269        tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 270        if (!tbl.data)
 271                return -ENOMEM;
 272
 273        rcu_read_lock();
 274        ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
 275        if (ctxt)
 276                memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
 277        else
 278                memset(user_key, 0, sizeof(user_key));
 279        rcu_read_unlock();
 280
 281        snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
 282                user_key[0], user_key[1], user_key[2], user_key[3]);
 283        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 284
 285        if (write && ret == 0) {
 286                if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
 287                           user_key + 2, user_key + 3) != 4) {
 288                        ret = -EINVAL;
 289                        goto bad_key;
 290                }
 291                tcp_fastopen_reset_cipher(net, NULL, user_key,
 292                                          TCP_FASTOPEN_KEY_LENGTH);
 293        }
 294
 295bad_key:
 296        pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
 297               user_key[0], user_key[1], user_key[2], user_key[3],
 298               (char *)tbl.data, ret);
 299        kfree(tbl.data);
 300        return ret;
 301}
 302
 303static void proc_configure_early_demux(int enabled, int protocol)
 304{
 305        struct net_protocol *ipprot;
 306#if IS_ENABLED(CONFIG_IPV6)
 307        struct inet6_protocol *ip6prot;
 308#endif
 309
 310        rcu_read_lock();
 311
 312        ipprot = rcu_dereference(inet_protos[protocol]);
 313        if (ipprot)
 314                ipprot->early_demux = enabled ? ipprot->early_demux_handler :
 315                                                NULL;
 316
 317#if IS_ENABLED(CONFIG_IPV6)
 318        ip6prot = rcu_dereference(inet6_protos[protocol]);
 319        if (ip6prot)
 320                ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
 321                                                 NULL;
 322#endif
 323        rcu_read_unlock();
 324}
 325
 326static int proc_tcp_early_demux(struct ctl_table *table, int write,
 327                                void __user *buffer, size_t *lenp, loff_t *ppos)
 328{
 329        int ret = 0;
 330
 331        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 332
 333        if (write && !ret) {
 334                int enabled = init_net.ipv4.sysctl_tcp_early_demux;
 335
 336                proc_configure_early_demux(enabled, IPPROTO_TCP);
 337        }
 338
 339        return ret;
 340}
 341
 342static int proc_udp_early_demux(struct ctl_table *table, int write,
 343                                void __user *buffer, size_t *lenp, loff_t *ppos)
 344{
 345        int ret = 0;
 346
 347        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 348
 349        if (write && !ret) {
 350                int enabled = init_net.ipv4.sysctl_udp_early_demux;
 351
 352                proc_configure_early_demux(enabled, IPPROTO_UDP);
 353        }
 354
 355        return ret;
 356}
 357
 358static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
 359                                             int write,
 360                                             void __user *buffer,
 361                                             size_t *lenp, loff_t *ppos)
 362{
 363        struct net *net = container_of(table->data, struct net,
 364            ipv4.sysctl_tcp_fastopen_blackhole_timeout);
 365        int ret;
 366
 367        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 368        if (write && ret == 0)
 369                atomic_set(&net->ipv4.tfo_active_disable_times, 0);
 370
 371        return ret;
 372}
 373
 374static int proc_tcp_available_ulp(struct ctl_table *ctl,
 375                                  int write,
 376                                  void __user *buffer, size_t *lenp,
 377                                  loff_t *ppos)
 378{
 379        struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, };
 380        int ret;
 381
 382        tbl.data = kmalloc(tbl.maxlen, GFP_USER);
 383        if (!tbl.data)
 384                return -ENOMEM;
 385        tcp_get_available_ulp(tbl.data, TCP_ULP_BUF_MAX);
 386        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 387        kfree(tbl.data);
 388
 389        return ret;
 390}
 391
 392#ifdef CONFIG_IP_ROUTE_MULTIPATH
 393static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 394                                          void __user *buffer, size_t *lenp,
 395                                          loff_t *ppos)
 396{
 397        struct net *net = container_of(table->data, struct net,
 398            ipv4.sysctl_fib_multipath_hash_policy);
 399        int ret;
 400
 401        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 402        if (write && ret == 0)
 403                call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
 404
 405        return ret;
 406}
 407#endif
 408
 409static struct ctl_table ipv4_table[] = {
 410        {
 411                .procname       = "tcp_max_orphans",
 412                .data           = &sysctl_tcp_max_orphans,
 413                .maxlen         = sizeof(int),
 414                .mode           = 0644,
 415                .proc_handler   = proc_dointvec
 416        },
 417        {
 418                .procname       = "inet_peer_threshold",
 419                .data           = &inet_peer_threshold,
 420                .maxlen         = sizeof(int),
 421                .mode           = 0644,
 422                .proc_handler   = proc_dointvec
 423        },
 424        {
 425                .procname       = "inet_peer_minttl",
 426                .data           = &inet_peer_minttl,
 427                .maxlen         = sizeof(int),
 428                .mode           = 0644,
 429                .proc_handler   = proc_dointvec_jiffies,
 430        },
 431        {
 432                .procname       = "inet_peer_maxttl",
 433                .data           = &inet_peer_maxttl,
 434                .maxlen         = sizeof(int),
 435                .mode           = 0644,
 436                .proc_handler   = proc_dointvec_jiffies,
 437        },
 438        {
 439                .procname       = "tcp_mem",
 440                .maxlen         = sizeof(sysctl_tcp_mem),
 441                .data           = &sysctl_tcp_mem,
 442                .mode           = 0644,
 443                .proc_handler   = proc_doulongvec_minmax,
 444        },
 445        {
 446                .procname       = "tcp_low_latency",
 447                .data           = &sysctl_tcp_low_latency,
 448                .maxlen         = sizeof(int),
 449                .mode           = 0644,
 450                .proc_handler   = proc_dointvec
 451        },
 452#ifdef CONFIG_NETLABEL
 453        {
 454                .procname       = "cipso_cache_enable",
 455                .data           = &cipso_v4_cache_enabled,
 456                .maxlen         = sizeof(int),
 457                .mode           = 0644,
 458                .proc_handler   = proc_dointvec,
 459        },
 460        {
 461                .procname       = "cipso_cache_bucket_size",
 462                .data           = &cipso_v4_cache_bucketsize,
 463                .maxlen         = sizeof(int),
 464                .mode           = 0644,
 465                .proc_handler   = proc_dointvec,
 466        },
 467        {
 468                .procname       = "cipso_rbm_optfmt",
 469                .data           = &cipso_v4_rbm_optfmt,
 470                .maxlen         = sizeof(int),
 471                .mode           = 0644,
 472                .proc_handler   = proc_dointvec,
 473        },
 474        {
 475                .procname       = "cipso_rbm_strictvalid",
 476                .data           = &cipso_v4_rbm_strictvalid,
 477                .maxlen         = sizeof(int),
 478                .mode           = 0644,
 479                .proc_handler   = proc_dointvec,
 480        },
 481#endif /* CONFIG_NETLABEL */
 482        {
 483                .procname       = "tcp_available_congestion_control",
 484                .maxlen         = TCP_CA_BUF_MAX,
 485                .mode           = 0444,
 486                .proc_handler   = proc_tcp_available_congestion_control,
 487        },
 488        {
 489                .procname       = "tcp_allowed_congestion_control",
 490                .maxlen         = TCP_CA_BUF_MAX,
 491                .mode           = 0644,
 492                .proc_handler   = proc_allowed_congestion_control,
 493        },
 494        {
 495                .procname       = "tcp_available_ulp",
 496                .maxlen         = TCP_ULP_BUF_MAX,
 497                .mode           = 0444,
 498                .proc_handler   = proc_tcp_available_ulp,
 499        },
 500        {
 501                .procname       = "icmp_msgs_per_sec",
 502                .data           = &sysctl_icmp_msgs_per_sec,
 503                .maxlen         = sizeof(int),
 504                .mode           = 0644,
 505                .proc_handler   = proc_dointvec_minmax,
 506                .extra1         = &zero,
 507        },
 508        {
 509                .procname       = "icmp_msgs_burst",
 510                .data           = &sysctl_icmp_msgs_burst,
 511                .maxlen         = sizeof(int),
 512                .mode           = 0644,
 513                .proc_handler   = proc_dointvec_minmax,
 514                .extra1         = &zero,
 515        },
 516        {
 517                .procname       = "udp_mem",
 518                .data           = &sysctl_udp_mem,
 519                .maxlen         = sizeof(sysctl_udp_mem),
 520                .mode           = 0644,
 521                .proc_handler   = proc_doulongvec_minmax,
 522        },
 523        {
 524                .procname       = "udp_rmem_min",
 525                .data           = &sysctl_udp_rmem_min,
 526                .maxlen         = sizeof(sysctl_udp_rmem_min),
 527                .mode           = 0644,
 528                .proc_handler   = proc_dointvec_minmax,
 529                .extra1         = &one
 530        },
 531        {
 532                .procname       = "udp_wmem_min",
 533                .data           = &sysctl_udp_wmem_min,
 534                .maxlen         = sizeof(sysctl_udp_wmem_min),
 535                .mode           = 0644,
 536                .proc_handler   = proc_dointvec_minmax,
 537                .extra1         = &one
 538        },
 539        { }
 540};
 541
 542static struct ctl_table ipv4_net_table[] = {
 543        {
 544                .procname       = "icmp_echo_ignore_all",
 545                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
 546                .maxlen         = sizeof(int),
 547                .mode           = 0644,
 548                .proc_handler   = proc_dointvec
 549        },
 550        {
 551                .procname       = "icmp_echo_ignore_broadcasts",
 552                .data           = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
 553                .maxlen         = sizeof(int),
 554                .mode           = 0644,
 555                .proc_handler   = proc_dointvec
 556        },
 557        {
 558                .procname       = "icmp_ignore_bogus_error_responses",
 559                .data           = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
 560                .maxlen         = sizeof(int),
 561                .mode           = 0644,
 562                .proc_handler   = proc_dointvec
 563        },
 564        {
 565                .procname       = "icmp_errors_use_inbound_ifaddr",
 566                .data           = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
 567                .maxlen         = sizeof(int),
 568                .mode           = 0644,
 569                .proc_handler   = proc_dointvec
 570        },
 571        {
 572                .procname       = "icmp_ratelimit",
 573                .data           = &init_net.ipv4.sysctl_icmp_ratelimit,
 574                .maxlen         = sizeof(int),
 575                .mode           = 0644,
 576                .proc_handler   = proc_dointvec_ms_jiffies,
 577        },
 578        {
 579                .procname       = "icmp_ratemask",
 580                .data           = &init_net.ipv4.sysctl_icmp_ratemask,
 581                .maxlen         = sizeof(int),
 582                .mode           = 0644,
 583                .proc_handler   = proc_dointvec
 584        },
 585        {
 586                .procname       = "ping_group_range",
 587                .data           = &init_net.ipv4.ping_group_range.range,
 588                .maxlen         = sizeof(gid_t)*2,
 589                .mode           = 0644,
 590                .proc_handler   = ipv4_ping_group_range,
 591        },
 592        {
 593                .procname       = "tcp_ecn",
 594                .data           = &init_net.ipv4.sysctl_tcp_ecn,
 595                .maxlen         = sizeof(int),
 596                .mode           = 0644,
 597                .proc_handler   = proc_dointvec
 598        },
 599        {
 600                .procname       = "tcp_ecn_fallback",
 601                .data           = &init_net.ipv4.sysctl_tcp_ecn_fallback,
 602                .maxlen         = sizeof(int),
 603                .mode           = 0644,
 604                .proc_handler   = proc_dointvec
 605        },
 606        {
 607                .procname       = "ip_dynaddr",
 608                .data           = &init_net.ipv4.sysctl_ip_dynaddr,
 609                .maxlen         = sizeof(int),
 610                .mode           = 0644,
 611                .proc_handler   = proc_dointvec
 612        },
 613        {
 614                .procname       = "ip_early_demux",
 615                .data           = &init_net.ipv4.sysctl_ip_early_demux,
 616                .maxlen         = sizeof(int),
 617                .mode           = 0644,
 618                .proc_handler   = proc_dointvec
 619        },
 620        {
 621                .procname       = "udp_early_demux",
 622                .data           = &init_net.ipv4.sysctl_udp_early_demux,
 623                .maxlen         = sizeof(int),
 624                .mode           = 0644,
 625                .proc_handler   = proc_udp_early_demux
 626        },
 627        {
 628                .procname       = "tcp_early_demux",
 629                .data           = &init_net.ipv4.sysctl_tcp_early_demux,
 630                .maxlen         = sizeof(int),
 631                .mode           = 0644,
 632                .proc_handler   = proc_tcp_early_demux
 633        },
 634        {
 635                .procname       = "ip_default_ttl",
 636                .data           = &init_net.ipv4.sysctl_ip_default_ttl,
 637                .maxlen         = sizeof(int),
 638                .mode           = 0644,
 639                .proc_handler   = proc_dointvec_minmax,
 640                .extra1         = &ip_ttl_min,
 641                .extra2         = &ip_ttl_max,
 642        },
 643        {
 644                .procname       = "ip_local_port_range",
 645                .maxlen         = sizeof(init_net.ipv4.ip_local_ports.range),
 646                .data           = &init_net.ipv4.ip_local_ports.range,
 647                .mode           = 0644,
 648                .proc_handler   = ipv4_local_port_range,
 649        },
 650        {
 651                .procname       = "ip_local_reserved_ports",
 652                .data           = &init_net.ipv4.sysctl_local_reserved_ports,
 653                .maxlen         = 65536,
 654                .mode           = 0644,
 655                .proc_handler   = proc_do_large_bitmap,
 656        },
 657        {
 658                .procname       = "ip_no_pmtu_disc",
 659                .data           = &init_net.ipv4.sysctl_ip_no_pmtu_disc,
 660                .maxlen         = sizeof(int),
 661                .mode           = 0644,
 662                .proc_handler   = proc_dointvec
 663        },
 664        {
 665                .procname       = "ip_forward_use_pmtu",
 666                .data           = &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
 667                .maxlen         = sizeof(int),
 668                .mode           = 0644,
 669                .proc_handler   = proc_dointvec,
 670        },
 671        {
 672                .procname       = "ip_nonlocal_bind",
 673                .data           = &init_net.ipv4.sysctl_ip_nonlocal_bind,
 674                .maxlen         = sizeof(int),
 675                .mode           = 0644,
 676                .proc_handler   = proc_dointvec
 677        },
 678        {
 679                .procname       = "fwmark_reflect",
 680                .data           = &init_net.ipv4.sysctl_fwmark_reflect,
 681                .maxlen         = sizeof(int),
 682                .mode           = 0644,
 683                .proc_handler   = proc_dointvec,
 684        },
 685        {
 686                .procname       = "tcp_fwmark_accept",
 687                .data           = &init_net.ipv4.sysctl_tcp_fwmark_accept,
 688                .maxlen         = sizeof(int),
 689                .mode           = 0644,
 690                .proc_handler   = proc_dointvec,
 691        },
 692#ifdef CONFIG_NET_L3_MASTER_DEV
 693        {
 694                .procname       = "tcp_l3mdev_accept",
 695                .data           = &init_net.ipv4.sysctl_tcp_l3mdev_accept,
 696                .maxlen         = sizeof(int),
 697                .mode           = 0644,
 698                .proc_handler   = proc_dointvec_minmax,
 699                .extra1         = &zero,
 700                .extra2         = &one,
 701        },
 702#endif
 703        {
 704                .procname       = "tcp_mtu_probing",
 705                .data           = &init_net.ipv4.sysctl_tcp_mtu_probing,
 706                .maxlen         = sizeof(int),
 707                .mode           = 0644,
 708                .proc_handler   = proc_dointvec,
 709        },
 710        {
 711                .procname       = "tcp_base_mss",
 712                .data           = &init_net.ipv4.sysctl_tcp_base_mss,
 713                .maxlen         = sizeof(int),
 714                .mode           = 0644,
 715                .proc_handler   = proc_dointvec,
 716        },
 717        {
 718                .procname       = "tcp_probe_threshold",
 719                .data           = &init_net.ipv4.sysctl_tcp_probe_threshold,
 720                .maxlen         = sizeof(int),
 721                .mode           = 0644,
 722                .proc_handler   = proc_dointvec,
 723        },
 724        {
 725                .procname       = "tcp_probe_interval",
 726                .data           = &init_net.ipv4.sysctl_tcp_probe_interval,
 727                .maxlen         = sizeof(int),
 728                .mode           = 0644,
 729                .proc_handler   = proc_dointvec,
 730        },
 731        {
 732                .procname       = "igmp_link_local_mcast_reports",
 733                .data           = &init_net.ipv4.sysctl_igmp_llm_reports,
 734                .maxlen         = sizeof(int),
 735                .mode           = 0644,
 736                .proc_handler   = proc_dointvec
 737        },
 738        {
 739                .procname       = "igmp_max_memberships",
 740                .data           = &init_net.ipv4.sysctl_igmp_max_memberships,
 741                .maxlen         = sizeof(int),
 742                .mode           = 0644,
 743                .proc_handler   = proc_dointvec
 744        },
 745        {
 746                .procname       = "igmp_max_msf",
 747                .data           = &init_net.ipv4.sysctl_igmp_max_msf,
 748                .maxlen         = sizeof(int),
 749                .mode           = 0644,
 750                .proc_handler   = proc_dointvec
 751        },
 752#ifdef CONFIG_IP_MULTICAST
 753        {
 754                .procname       = "igmp_qrv",
 755                .data           = &init_net.ipv4.sysctl_igmp_qrv,
 756                .maxlen         = sizeof(int),
 757                .mode           = 0644,
 758                .proc_handler   = proc_dointvec_minmax,
 759                .extra1         = &one
 760        },
 761#endif
 762        {
 763                .procname       = "tcp_congestion_control",
 764                .data           = &init_net.ipv4.tcp_congestion_control,
 765                .mode           = 0644,
 766                .maxlen         = TCP_CA_NAME_MAX,
 767                .proc_handler   = proc_tcp_congestion_control,
 768        },
 769        {
 770                .procname       = "tcp_keepalive_time",
 771                .data           = &init_net.ipv4.sysctl_tcp_keepalive_time,
 772                .maxlen         = sizeof(int),
 773                .mode           = 0644,
 774                .proc_handler   = proc_dointvec_jiffies,
 775        },
 776        {
 777                .procname       = "tcp_keepalive_probes",
 778                .data           = &init_net.ipv4.sysctl_tcp_keepalive_probes,
 779                .maxlen         = sizeof(int),
 780                .mode           = 0644,
 781                .proc_handler   = proc_dointvec
 782        },
 783        {
 784                .procname       = "tcp_keepalive_intvl",
 785                .data           = &init_net.ipv4.sysctl_tcp_keepalive_intvl,
 786                .maxlen         = sizeof(int),
 787                .mode           = 0644,
 788                .proc_handler   = proc_dointvec_jiffies,
 789        },
 790        {
 791                .procname       = "tcp_syn_retries",
 792                .data           = &init_net.ipv4.sysctl_tcp_syn_retries,
 793                .maxlen         = sizeof(int),
 794                .mode           = 0644,
 795                .proc_handler   = proc_dointvec_minmax,
 796                .extra1         = &tcp_syn_retries_min,
 797                .extra2         = &tcp_syn_retries_max
 798        },
 799        {
 800                .procname       = "tcp_synack_retries",
 801                .data           = &init_net.ipv4.sysctl_tcp_synack_retries,
 802                .maxlen         = sizeof(int),
 803                .mode           = 0644,
 804                .proc_handler   = proc_dointvec
 805        },
 806#ifdef CONFIG_SYN_COOKIES
 807        {
 808                .procname       = "tcp_syncookies",
 809                .data           = &init_net.ipv4.sysctl_tcp_syncookies,
 810                .maxlen         = sizeof(int),
 811                .mode           = 0644,
 812                .proc_handler   = proc_dointvec
 813        },
 814#endif
 815        {
 816                .procname       = "tcp_reordering",
 817                .data           = &init_net.ipv4.sysctl_tcp_reordering,
 818                .maxlen         = sizeof(int),
 819                .mode           = 0644,
 820                .proc_handler   = proc_dointvec
 821        },
 822        {
 823                .procname       = "tcp_retries1",
 824                .data           = &init_net.ipv4.sysctl_tcp_retries1,
 825                .maxlen         = sizeof(int),
 826                .mode           = 0644,
 827                .proc_handler   = proc_dointvec_minmax,
 828                .extra2         = &tcp_retr1_max
 829        },
 830        {
 831                .procname       = "tcp_retries2",
 832                .data           = &init_net.ipv4.sysctl_tcp_retries2,
 833                .maxlen         = sizeof(int),
 834                .mode           = 0644,
 835                .proc_handler   = proc_dointvec
 836        },
 837        {
 838                .procname       = "tcp_orphan_retries",
 839                .data           = &init_net.ipv4.sysctl_tcp_orphan_retries,
 840                .maxlen         = sizeof(int),
 841                .mode           = 0644,
 842                .proc_handler   = proc_dointvec
 843        },
 844        {
 845                .procname       = "tcp_fin_timeout",
 846                .data           = &init_net.ipv4.sysctl_tcp_fin_timeout,
 847                .maxlen         = sizeof(int),
 848                .mode           = 0644,
 849                .proc_handler   = proc_dointvec_jiffies,
 850        },
 851        {
 852                .procname       = "tcp_notsent_lowat",
 853                .data           = &init_net.ipv4.sysctl_tcp_notsent_lowat,
 854                .maxlen         = sizeof(unsigned int),
 855                .mode           = 0644,
 856                .proc_handler   = proc_douintvec,
 857        },
 858        {
 859                .procname       = "tcp_tw_reuse",
 860                .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
 861                .maxlen         = sizeof(int),
 862                .mode           = 0644,
 863                .proc_handler   = proc_dointvec
 864        },
 865        {
 866                .procname       = "tcp_max_tw_buckets",
 867                .data           = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
 868                .maxlen         = sizeof(int),
 869                .mode           = 0644,
 870                .proc_handler   = proc_dointvec
 871        },
 872        {
 873                .procname       = "tcp_max_syn_backlog",
 874                .data           = &init_net.ipv4.sysctl_max_syn_backlog,
 875                .maxlen         = sizeof(int),
 876                .mode           = 0644,
 877                .proc_handler   = proc_dointvec
 878        },
 879        {
 880                .procname       = "tcp_fastopen",
 881                .data           = &init_net.ipv4.sysctl_tcp_fastopen,
 882                .maxlen         = sizeof(int),
 883                .mode           = 0644,
 884                .proc_handler   = proc_dointvec,
 885        },
 886        {
 887                .procname       = "tcp_fastopen_key",
 888                .mode           = 0600,
 889                .data           = &init_net.ipv4.sysctl_tcp_fastopen,
 890                .maxlen         = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
 891                .proc_handler   = proc_tcp_fastopen_key,
 892        },
 893        {
 894                .procname       = "tcp_fastopen_blackhole_timeout_sec",
 895                .data           = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
 896                .maxlen         = sizeof(int),
 897                .mode           = 0644,
 898                .proc_handler   = proc_tfo_blackhole_detect_timeout,
 899                .extra1         = &zero,
 900        },
 901#ifdef CONFIG_IP_ROUTE_MULTIPATH
 902        {
 903                .procname       = "fib_multipath_use_neigh",
 904                .data           = &init_net.ipv4.sysctl_fib_multipath_use_neigh,
 905                .maxlen         = sizeof(int),
 906                .mode           = 0644,
 907                .proc_handler   = proc_dointvec_minmax,
 908                .extra1         = &zero,
 909                .extra2         = &one,
 910        },
 911        {
 912                .procname       = "fib_multipath_hash_policy",
 913                .data           = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
 914                .maxlen         = sizeof(int),
 915                .mode           = 0644,
 916                .proc_handler   = proc_fib_multipath_hash_policy,
 917                .extra1         = &zero,
 918                .extra2         = &one,
 919        },
 920#endif
 921        {
 922                .procname       = "ip_unprivileged_port_start",
 923                .maxlen         = sizeof(int),
 924                .data           = &init_net.ipv4.sysctl_ip_prot_sock,
 925                .mode           = 0644,
 926                .proc_handler   = ipv4_privileged_ports,
 927        },
 928#ifdef CONFIG_NET_L3_MASTER_DEV
 929        {
 930                .procname       = "udp_l3mdev_accept",
 931                .data           = &init_net.ipv4.sysctl_udp_l3mdev_accept,
 932                .maxlen         = sizeof(int),
 933                .mode           = 0644,
 934                .proc_handler   = proc_dointvec_minmax,
 935                .extra1         = &zero,
 936                .extra2         = &one,
 937        },
 938#endif
 939        {
 940                .procname       = "tcp_sack",
 941                .data           = &init_net.ipv4.sysctl_tcp_sack,
 942                .maxlen         = sizeof(int),
 943                .mode           = 0644,
 944                .proc_handler   = proc_dointvec
 945        },
 946        {
 947                .procname       = "tcp_window_scaling",
 948                .data           = &init_net.ipv4.sysctl_tcp_window_scaling,
 949                .maxlen         = sizeof(int),
 950                .mode           = 0644,
 951                .proc_handler   = proc_dointvec
 952        },
 953        {
 954                .procname       = "tcp_timestamps",
 955                .data           = &init_net.ipv4.sysctl_tcp_timestamps,
 956                .maxlen         = sizeof(int),
 957                .mode           = 0644,
 958                .proc_handler   = proc_dointvec
 959        },
 960        {
 961                .procname       = "tcp_early_retrans",
 962                .data           = &init_net.ipv4.sysctl_tcp_early_retrans,
 963                .maxlen         = sizeof(int),
 964                .mode           = 0644,
 965                .proc_handler   = proc_dointvec_minmax,
 966                .extra1         = &zero,
 967                .extra2         = &four,
 968        },
 969        {
 970                .procname       = "tcp_recovery",
 971                .data           = &init_net.ipv4.sysctl_tcp_recovery,
 972                .maxlen         = sizeof(int),
 973                .mode           = 0644,
 974                .proc_handler   = proc_dointvec,
 975        },
 976        {
 977                .procname       = "tcp_thin_linear_timeouts",
 978                .data           = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
 979                .maxlen         = sizeof(int),
 980                .mode           = 0644,
 981                .proc_handler   = proc_dointvec
 982        },
 983        {
 984                .procname       = "tcp_slow_start_after_idle",
 985                .data           = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
 986                .maxlen         = sizeof(int),
 987                .mode           = 0644,
 988                .proc_handler   = proc_dointvec
 989        },
 990        {
 991                .procname       = "tcp_retrans_collapse",
 992                .data           = &init_net.ipv4.sysctl_tcp_retrans_collapse,
 993                .maxlen         = sizeof(int),
 994                .mode           = 0644,
 995                .proc_handler   = proc_dointvec
 996        },
 997        {
 998                .procname       = "tcp_stdurg",
 999                .data           = &init_net.ipv4.sysctl_tcp_stdurg,
1000                .maxlen         = sizeof(int),
1001                .mode           = 0644,
1002                .proc_handler   = proc_dointvec
1003        },
1004        {
1005                .procname       = "tcp_rfc1337",
1006                .data           = &init_net.ipv4.sysctl_tcp_rfc1337,
1007                .maxlen         = sizeof(int),
1008                .mode           = 0644,
1009                .proc_handler   = proc_dointvec
1010        },
1011        {
1012                .procname       = "tcp_abort_on_overflow",
1013                .data           = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
1014                .maxlen         = sizeof(int),
1015                .mode           = 0644,
1016                .proc_handler   = proc_dointvec
1017        },
1018        {
1019                .procname       = "tcp_fack",
1020                .data           = &init_net.ipv4.sysctl_tcp_fack,
1021                .maxlen         = sizeof(int),
1022                .mode           = 0644,
1023                .proc_handler   = proc_dointvec
1024        },
1025        {
1026                .procname       = "tcp_max_reordering",
1027                .data           = &init_net.ipv4.sysctl_tcp_max_reordering,
1028                .maxlen         = sizeof(int),
1029                .mode           = 0644,
1030                .proc_handler   = proc_dointvec
1031        },
1032        {
1033                .procname       = "tcp_dsack",
1034                .data           = &init_net.ipv4.sysctl_tcp_dsack,
1035                .maxlen         = sizeof(int),
1036                .mode           = 0644,
1037                .proc_handler   = proc_dointvec
1038        },
1039        {
1040                .procname       = "tcp_app_win",
1041                .data           = &init_net.ipv4.sysctl_tcp_app_win,
1042                .maxlen         = sizeof(int),
1043                .mode           = 0644,
1044                .proc_handler   = proc_dointvec
1045        },
1046        {
1047                .procname       = "tcp_adv_win_scale",
1048                .data           = &init_net.ipv4.sysctl_tcp_adv_win_scale,
1049                .maxlen         = sizeof(int),
1050                .mode           = 0644,
1051                .proc_handler   = proc_dointvec_minmax,
1052                .extra1         = &tcp_adv_win_scale_min,
1053                .extra2         = &tcp_adv_win_scale_max,
1054        },
1055        {
1056                .procname       = "tcp_frto",
1057                .data           = &init_net.ipv4.sysctl_tcp_frto,
1058                .maxlen         = sizeof(int),
1059                .mode           = 0644,
1060                .proc_handler   = proc_dointvec
1061        },
1062        {
1063                .procname       = "tcp_no_metrics_save",
1064                .data           = &init_net.ipv4.sysctl_tcp_nometrics_save,
1065                .maxlen         = sizeof(int),
1066                .mode           = 0644,
1067                .proc_handler   = proc_dointvec,
1068        },
1069        {
1070                .procname       = "tcp_moderate_rcvbuf",
1071                .data           = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
1072                .maxlen         = sizeof(int),
1073                .mode           = 0644,
1074                .proc_handler   = proc_dointvec,
1075        },
1076        {
1077                .procname       = "tcp_tso_win_divisor",
1078                .data           = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
1079                .maxlen         = sizeof(int),
1080                .mode           = 0644,
1081                .proc_handler   = proc_dointvec,
1082        },
1083        {
1084                .procname       = "tcp_workaround_signed_windows",
1085                .data           = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
1086                .maxlen         = sizeof(int),
1087                .mode           = 0644,
1088                .proc_handler   = proc_dointvec
1089        },
1090        {
1091                .procname       = "tcp_limit_output_bytes",
1092                .data           = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
1093                .maxlen         = sizeof(int),
1094                .mode           = 0644,
1095                .proc_handler   = proc_dointvec
1096        },
1097        {
1098                .procname       = "tcp_challenge_ack_limit",
1099                .data           = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
1100                .maxlen         = sizeof(int),
1101                .mode           = 0644,
1102                .proc_handler   = proc_dointvec
1103        },
1104        {
1105                .procname       = "tcp_min_tso_segs",
1106                .data           = &init_net.ipv4.sysctl_tcp_min_tso_segs,
1107                .maxlen         = sizeof(int),
1108                .mode           = 0644,
1109                .proc_handler   = proc_dointvec_minmax,
1110                .extra1         = &one,
1111                .extra2         = &gso_max_segs,
1112        },
1113        {
1114                .procname       = "tcp_min_rtt_wlen",
1115                .data           = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
1116                .maxlen         = sizeof(int),
1117                .mode           = 0644,
1118                .proc_handler   = proc_dointvec
1119        },
1120        {
1121                .procname       = "tcp_autocorking",
1122                .data           = &init_net.ipv4.sysctl_tcp_autocorking,
1123                .maxlen         = sizeof(int),
1124                .mode           = 0644,
1125                .proc_handler   = proc_dointvec_minmax,
1126                .extra1         = &zero,
1127                .extra2         = &one,
1128        },
1129        {
1130                .procname       = "tcp_invalid_ratelimit",
1131                .data           = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
1132                .maxlen         = sizeof(int),
1133                .mode           = 0644,
1134                .proc_handler   = proc_dointvec_ms_jiffies,
1135        },
1136        {
1137                .procname       = "tcp_pacing_ss_ratio",
1138                .data           = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
1139                .maxlen         = sizeof(int),
1140                .mode           = 0644,
1141                .proc_handler   = proc_dointvec_minmax,
1142                .extra1         = &zero,
1143                .extra2         = &thousand,
1144        },
1145        {
1146                .procname       = "tcp_pacing_ca_ratio",
1147                .data           = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
1148                .maxlen         = sizeof(int),
1149                .mode           = 0644,
1150                .proc_handler   = proc_dointvec_minmax,
1151                .extra1         = &zero,
1152                .extra2         = &thousand,
1153        },
1154        {
1155                .procname       = "tcp_wmem",
1156                .data           = &init_net.ipv4.sysctl_tcp_wmem,
1157                .maxlen         = sizeof(init_net.ipv4.sysctl_tcp_wmem),
1158                .mode           = 0644,
1159                .proc_handler   = proc_dointvec_minmax,
1160                .extra1         = &one,
1161        },
1162        {
1163                .procname       = "tcp_rmem",
1164                .data           = &init_net.ipv4.sysctl_tcp_rmem,
1165                .maxlen         = sizeof(init_net.ipv4.sysctl_tcp_rmem),
1166                .mode           = 0644,
1167                .proc_handler   = proc_dointvec_minmax,
1168                .extra1         = &one,
1169        },
1170        { }
1171};
1172
1173static __net_init int ipv4_sysctl_init_net(struct net *net)
1174{
1175        struct ctl_table *table;
1176
1177        table = ipv4_net_table;
1178        if (!net_eq(net, &init_net)) {
1179                int i;
1180
1181                table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
1182                if (!table)
1183                        goto err_alloc;
1184
1185                /* Update the variables to point into the current struct net */
1186                for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++)
1187                        table[i].data += (void *)net - (void *)&init_net;
1188        }
1189
1190        net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
1191        if (!net->ipv4.ipv4_hdr)
1192                goto err_reg;
1193
1194        net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1195        if (!net->ipv4.sysctl_local_reserved_ports)
1196                goto err_ports;
1197
1198        return 0;
1199
1200err_ports:
1201        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1202err_reg:
1203        if (!net_eq(net, &init_net))
1204                kfree(table);
1205err_alloc:
1206        return -ENOMEM;
1207}
1208
1209static __net_exit void ipv4_sysctl_exit_net(struct net *net)
1210{
1211        struct ctl_table *table;
1212
1213        kfree(net->ipv4.sysctl_local_reserved_ports);
1214        table = net->ipv4.ipv4_hdr->ctl_table_arg;
1215        unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
1216        kfree(table);
1217}
1218
1219static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
1220        .init = ipv4_sysctl_init_net,
1221        .exit = ipv4_sysctl_exit_net,
1222};
1223
1224static __init int sysctl_ipv4_init(void)
1225{
1226        struct ctl_table_header *hdr;
1227
1228        hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
1229        if (!hdr)
1230                return -ENOMEM;
1231
1232        if (register_pernet_subsys(&ipv4_sysctl_ops)) {
1233                unregister_net_sysctl_table(hdr);
1234                return -ENOMEM;
1235        }
1236
1237        return 0;
1238}
1239
1240__initcall(sysctl_ipv4_init);
1241