linux/net/core/sysctl_net_core.c
<<
>>
Prefs
   1/* -*- linux-c -*-
   2 * sysctl_net_core.c: sysctl interface to net core subsystem.
   3 *
   4 * Begun April 1, 1996, Mike Shaver.
   5 * Added /proc/sys/net/core directory entry (empty =) ). [MS]
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/sysctl.h>
  10#include <linux/module.h>
  11#include <linux/socket.h>
  12#include <linux/netdevice.h>
  13#include <linux/ratelimit.h>
  14#include <linux/vmalloc.h>
  15#include <linux/init.h>
  16#include <linux/slab.h>
  17#include <linux/kmemleak.h>
  18
  19#include <net/ip.h>
  20#include <net/sock.h>
  21#include <net/net_ratelimit.h>
  22#include <net/busy_poll.h>
  23#include <net/pkt_sched.h>
  24
  25static int zero = 0;
  26static int one = 1;
  27static int min_sndbuf = SOCK_MIN_SNDBUF;
  28static int min_rcvbuf = SOCK_MIN_RCVBUF;
  29static int max_skb_frags = MAX_SKB_FRAGS;
  30
  31static int net_msg_warn;        /* Unused, but still a sysctl */
  32
  33#ifdef CONFIG_RPS
  34static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
  35                                void __user *buffer, size_t *lenp, loff_t *ppos)
  36{
  37        unsigned int orig_size, size;
  38        int ret, i;
  39        struct ctl_table tmp = {
  40                .data = &size,
  41                .maxlen = sizeof(size),
  42                .mode = table->mode
  43        };
  44        struct rps_sock_flow_table *orig_sock_table, *sock_table;
  45        static DEFINE_MUTEX(sock_flow_mutex);
  46
  47        mutex_lock(&sock_flow_mutex);
  48
  49        orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
  50                                        lockdep_is_held(&sock_flow_mutex));
  51        size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
  52
  53        ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
  54
  55        if (write) {
  56                if (size) {
  57                        if (size > 1<<29) {
  58                                /* Enforce limit to prevent overflow */
  59                                mutex_unlock(&sock_flow_mutex);
  60                                return -EINVAL;
  61                        }
  62                        size = roundup_pow_of_two(size);
  63                        if (size != orig_size) {
  64                                sock_table =
  65                                    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
  66                                if (!sock_table) {
  67                                        mutex_unlock(&sock_flow_mutex);
  68                                        return -ENOMEM;
  69                                }
  70                                rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
  71                                sock_table->mask = size - 1;
  72                        } else
  73                                sock_table = orig_sock_table;
  74
  75                        for (i = 0; i < size; i++)
  76                                sock_table->ents[i] = RPS_NO_CPU;
  77                } else
  78                        sock_table = NULL;
  79
  80                if (sock_table != orig_sock_table) {
  81                        rcu_assign_pointer(rps_sock_flow_table, sock_table);
  82                        if (sock_table)
  83                                static_key_slow_inc(&rps_needed);
  84                        if (orig_sock_table) {
  85                                static_key_slow_dec(&rps_needed);
  86                                synchronize_rcu();
  87                                vfree(orig_sock_table);
  88                        }
  89                }
  90        }
  91
  92        mutex_unlock(&sock_flow_mutex);
  93
  94        return ret;
  95}
  96#endif /* CONFIG_RPS */
  97
  98#ifdef CONFIG_NET_FLOW_LIMIT
  99static DEFINE_MUTEX(flow_limit_update_mutex);
 100
 101static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
 102                                 void __user *buffer, size_t *lenp,
 103                                 loff_t *ppos)
 104{
 105        struct sd_flow_limit *cur;
 106        struct softnet_data *sd;
 107        cpumask_var_t mask;
 108        int i, len, ret = 0;
 109
 110        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
 111                return -ENOMEM;
 112
 113        if (write) {
 114                ret = cpumask_parse_user(buffer, *lenp, mask);
 115                if (ret)
 116                        goto done;
 117
 118                mutex_lock(&flow_limit_update_mutex);
 119                len = sizeof(*cur) + netdev_flow_limit_table_len;
 120                for_each_possible_cpu(i) {
 121                        sd = &per_cpu(softnet_data, i);
 122                        cur = rcu_dereference_protected(sd->flow_limit,
 123                                     lockdep_is_held(&flow_limit_update_mutex));
 124                        if (cur && !cpumask_test_cpu(i, mask)) {
 125                                RCU_INIT_POINTER(sd->flow_limit, NULL);
 126                                synchronize_rcu();
 127                                kfree(cur);
 128                        } else if (!cur && cpumask_test_cpu(i, mask)) {
 129                                cur = kzalloc_node(len, GFP_KERNEL,
 130                                                   cpu_to_node(i));
 131                                if (!cur) {
 132                                        /* not unwinding previous changes */
 133                                        ret = -ENOMEM;
 134                                        goto write_unlock;
 135                                }
 136                                cur->num_buckets = netdev_flow_limit_table_len;
 137                                rcu_assign_pointer(sd->flow_limit, cur);
 138                        }
 139                }
 140write_unlock:
 141                mutex_unlock(&flow_limit_update_mutex);
 142        } else {
 143                char kbuf[128];
 144
 145                if (*ppos || !*lenp) {
 146                        *lenp = 0;
 147                        goto done;
 148                }
 149
 150                cpumask_clear(mask);
 151                rcu_read_lock();
 152                for_each_possible_cpu(i) {
 153                        sd = &per_cpu(softnet_data, i);
 154                        if (rcu_dereference(sd->flow_limit))
 155                                cpumask_set_cpu(i, mask);
 156                }
 157                rcu_read_unlock();
 158
 159                len = min(sizeof(kbuf) - 1, *lenp);
 160                len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
 161                if (!len) {
 162                        *lenp = 0;
 163                        goto done;
 164                }
 165                if (len < *lenp)
 166                        kbuf[len++] = '\n';
 167                if (copy_to_user(buffer, kbuf, len)) {
 168                        ret = -EFAULT;
 169                        goto done;
 170                }
 171                *lenp = len;
 172                *ppos += len;
 173        }
 174
 175done:
 176        free_cpumask_var(mask);
 177        return ret;
 178}
 179
 180static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
 181                                       void __user *buffer, size_t *lenp,
 182                                       loff_t *ppos)
 183{
 184        unsigned int old, *ptr;
 185        int ret;
 186
 187        mutex_lock(&flow_limit_update_mutex);
 188
 189        ptr = table->data;
 190        old = *ptr;
 191        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 192        if (!ret && write && !is_power_of_2(*ptr)) {
 193                *ptr = old;
 194                ret = -EINVAL;
 195        }
 196
 197        mutex_unlock(&flow_limit_update_mutex);
 198        return ret;
 199}
 200#endif /* CONFIG_NET_FLOW_LIMIT */
 201
 202#ifdef CONFIG_NET_SCHED
 203static int set_default_qdisc(struct ctl_table *table, int write,
 204                             void __user *buffer, size_t *lenp, loff_t *ppos)
 205{
 206        char id[IFNAMSIZ];
 207        struct ctl_table tbl = {
 208                .data = id,
 209                .maxlen = IFNAMSIZ,
 210        };
 211        int ret;
 212
 213        qdisc_get_default(id, IFNAMSIZ);
 214
 215        ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 216        if (write && ret == 0)
 217                ret = qdisc_set_default(id);
 218        return ret;
 219}
 220#endif
 221
 222static int proc_do_rss_key(struct ctl_table *table, int write,
 223                           void __user *buffer, size_t *lenp, loff_t *ppos)
 224{
 225        struct ctl_table fake_table;
 226        char buf[NETDEV_RSS_KEY_LEN * 3];
 227
 228        snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key);
 229        fake_table.data = buf;
 230        fake_table.maxlen = sizeof(buf);
 231        return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 232}
 233
 234static struct ctl_table net_core_table[] = {
 235#ifdef CONFIG_NET
 236        {
 237                .procname       = "wmem_max",
 238                .data           = &sysctl_wmem_max,
 239                .maxlen         = sizeof(int),
 240                .mode           = 0644,
 241                .proc_handler   = proc_dointvec_minmax,
 242                .extra1         = &min_sndbuf,
 243        },
 244        {
 245                .procname       = "rmem_max",
 246                .data           = &sysctl_rmem_max,
 247                .maxlen         = sizeof(int),
 248                .mode           = 0644,
 249                .proc_handler   = proc_dointvec_minmax,
 250                .extra1         = &min_rcvbuf,
 251        },
 252        {
 253                .procname       = "wmem_default",
 254                .data           = &sysctl_wmem_default,
 255                .maxlen         = sizeof(int),
 256                .mode           = 0644,
 257                .proc_handler   = proc_dointvec_minmax,
 258                .extra1         = &min_sndbuf,
 259        },
 260        {
 261                .procname       = "rmem_default",
 262                .data           = &sysctl_rmem_default,
 263                .maxlen         = sizeof(int),
 264                .mode           = 0644,
 265                .proc_handler   = proc_dointvec_minmax,
 266                .extra1         = &min_rcvbuf,
 267        },
 268        {
 269                .procname       = "dev_weight",
 270                .data           = &weight_p,
 271                .maxlen         = sizeof(int),
 272                .mode           = 0644,
 273                .proc_handler   = proc_dointvec
 274        },
 275        {
 276                .procname       = "netdev_max_backlog",
 277                .data           = &netdev_max_backlog,
 278                .maxlen         = sizeof(int),
 279                .mode           = 0644,
 280                .proc_handler   = proc_dointvec
 281        },
 282        {
 283                .procname       = "netdev_rss_key",
 284                .data           = &netdev_rss_key,
 285                .maxlen         = sizeof(int),
 286                .mode           = 0444,
 287                .proc_handler   = proc_do_rss_key,
 288        },
 289#ifdef CONFIG_BPF_JIT
 290        {
 291                .procname       = "bpf_jit_enable",
 292                .data           = &bpf_jit_enable,
 293                .maxlen         = sizeof(int),
 294                .mode           = 0644,
 295                .proc_handler   = proc_dointvec
 296        },
 297#endif
 298        {
 299                .procname       = "netdev_tstamp_prequeue",
 300                .data           = &netdev_tstamp_prequeue,
 301                .maxlen         = sizeof(int),
 302                .mode           = 0644,
 303                .proc_handler   = proc_dointvec
 304        },
 305        {
 306                .procname       = "message_cost",
 307                .data           = &net_ratelimit_state.interval,
 308                .maxlen         = sizeof(int),
 309                .mode           = 0644,
 310                .proc_handler   = proc_dointvec_jiffies,
 311        },
 312        {
 313                .procname       = "message_burst",
 314                .data           = &net_ratelimit_state.burst,
 315                .maxlen         = sizeof(int),
 316                .mode           = 0644,
 317                .proc_handler   = proc_dointvec,
 318        },
 319        {
 320                .procname       = "optmem_max",
 321                .data           = &sysctl_optmem_max,
 322                .maxlen         = sizeof(int),
 323                .mode           = 0644,
 324                .proc_handler   = proc_dointvec
 325        },
 326        {
 327                .procname       = "tstamp_allow_data",
 328                .data           = &sysctl_tstamp_allow_data,
 329                .maxlen         = sizeof(int),
 330                .mode           = 0644,
 331                .proc_handler   = proc_dointvec_minmax,
 332                .extra1         = &zero,
 333                .extra2         = &one
 334        },
 335#ifdef CONFIG_RPS
 336        {
 337                .procname       = "rps_sock_flow_entries",
 338                .maxlen         = sizeof(int),
 339                .mode           = 0644,
 340                .proc_handler   = rps_sock_flow_sysctl
 341        },
 342#endif
 343#ifdef CONFIG_NET_FLOW_LIMIT
 344        {
 345                .procname       = "flow_limit_cpu_bitmap",
 346                .mode           = 0644,
 347                .proc_handler   = flow_limit_cpu_sysctl
 348        },
 349        {
 350                .procname       = "flow_limit_table_len",
 351                .data           = &netdev_flow_limit_table_len,
 352                .maxlen         = sizeof(int),
 353                .mode           = 0644,
 354                .proc_handler   = flow_limit_table_len_sysctl
 355        },
 356#endif /* CONFIG_NET_FLOW_LIMIT */
 357#ifdef CONFIG_NET_RX_BUSY_POLL
 358        {
 359                .procname       = "busy_poll",
 360                .data           = &sysctl_net_busy_poll,
 361                .maxlen         = sizeof(unsigned int),
 362                .mode           = 0644,
 363                .proc_handler   = proc_dointvec
 364        },
 365        {
 366                .procname       = "busy_read",
 367                .data           = &sysctl_net_busy_read,
 368                .maxlen         = sizeof(unsigned int),
 369                .mode           = 0644,
 370                .proc_handler   = proc_dointvec
 371        },
 372#endif
 373#ifdef CONFIG_NET_SCHED
 374        {
 375                .procname       = "default_qdisc",
 376                .mode           = 0644,
 377                .maxlen         = IFNAMSIZ,
 378                .proc_handler   = set_default_qdisc
 379        },
 380#endif
 381#endif /* CONFIG_NET */
 382        {
 383                .procname       = "netdev_budget",
 384                .data           = &netdev_budget,
 385                .maxlen         = sizeof(int),
 386                .mode           = 0644,
 387                .proc_handler   = proc_dointvec
 388        },
 389        {
 390                .procname       = "warnings",
 391                .data           = &net_msg_warn,
 392                .maxlen         = sizeof(int),
 393                .mode           = 0644,
 394                .proc_handler   = proc_dointvec
 395        },
 396        {
 397                .procname       = "max_skb_frags",
 398                .data           = &sysctl_max_skb_frags,
 399                .maxlen         = sizeof(int),
 400                .mode           = 0644,
 401                .proc_handler   = proc_dointvec_minmax,
 402                .extra1         = &one,
 403                .extra2         = &max_skb_frags,
 404        },
 405        { }
 406};
 407
 408static struct ctl_table netns_core_table[] = {
 409        {
 410                .procname       = "somaxconn",
 411                .data           = &init_net.core.sysctl_somaxconn,
 412                .maxlen         = sizeof(int),
 413                .mode           = 0644,
 414                .extra1         = &zero,
 415                .proc_handler   = proc_dointvec_minmax
 416        },
 417        { }
 418};
 419
 420static __net_init int sysctl_core_net_init(struct net *net)
 421{
 422        struct ctl_table *tbl;
 423
 424        net->core.sysctl_somaxconn = SOMAXCONN;
 425
 426        tbl = netns_core_table;
 427        if (!net_eq(net, &init_net)) {
 428                tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
 429                if (tbl == NULL)
 430                        goto err_dup;
 431
 432                tbl[0].data = &net->core.sysctl_somaxconn;
 433
 434                /* Don't export any sysctls to unprivileged users */
 435                if (net->user_ns != &init_user_ns) {
 436                        tbl[0].procname = NULL;
 437                }
 438        }
 439
 440        net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
 441        if (net->core.sysctl_hdr == NULL)
 442                goto err_reg;
 443
 444        return 0;
 445
 446err_reg:
 447        if (tbl != netns_core_table)
 448                kfree(tbl);
 449err_dup:
 450        return -ENOMEM;
 451}
 452
 453static __net_exit void sysctl_core_net_exit(struct net *net)
 454{
 455        struct ctl_table *tbl;
 456
 457        tbl = net->core.sysctl_hdr->ctl_table_arg;
 458        unregister_net_sysctl_table(net->core.sysctl_hdr);
 459        BUG_ON(tbl == netns_core_table);
 460        kfree(tbl);
 461}
 462
 463static __net_initdata struct pernet_operations sysctl_core_ops = {
 464        .init = sysctl_core_net_init,
 465        .exit = sysctl_core_net_exit,
 466};
 467
 468static __init int sysctl_core_init(void)
 469{
 470        register_net_sysctl(&init_net, "net/core", net_core_table);
 471        return register_pernet_subsys(&sysctl_core_ops);
 472}
 473
 474fs_initcall(sysctl_core_init);
 475