linux/net/core/netprio_cgroup.c
<<
>>
Prefs
   1/*
   2 * net/core/netprio_cgroup.c    Priority Control Group
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Neil Horman <nhorman@tuxdriver.com>
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/module.h>
  15#include <linux/slab.h>
  16#include <linux/types.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <linux/cgroup.h>
  21#include <linux/rcupdate.h>
  22#include <linux/atomic.h>
  23#include <net/rtnetlink.h>
  24#include <net/pkt_cls.h>
  25#include <net/sock.h>
  26#include <net/netprio_cgroup.h>
  27
  28#include <linux/fdtable.h>
  29
  30#define PRIOMAP_MIN_SZ          128
  31
  32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
  33{
  34        return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
  35                            struct cgroup_netprio_state, css);
  36}
  37
  38/*
  39 * Extend @dev->priomap so that it's large enough to accomodate
  40 * @target_idx.  @dev->priomap.priomap_len > @target_idx after successful
  41 * return.  Must be called under rtnl lock.
  42 */
  43static int extend_netdev_table(struct net_device *dev, u32 target_idx)
  44{
  45        struct netprio_map *old, *new;
  46        size_t new_sz, new_len;
  47
  48        /* is the existing priomap large enough? */
  49        old = rtnl_dereference(dev->priomap);
  50        if (old && old->priomap_len > target_idx)
  51                return 0;
  52
  53        /*
  54         * Determine the new size.  Let's keep it power-of-two.  We start
  55         * from PRIOMAP_MIN_SZ and double it until it's large enough to
  56         * accommodate @target_idx.
  57         */
  58        new_sz = PRIOMAP_MIN_SZ;
  59        while (true) {
  60                new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
  61                        sizeof(new->priomap[0]);
  62                if (new_len > target_idx)
  63                        break;
  64                new_sz *= 2;
  65                /* overflowed? */
  66                if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
  67                        return -ENOSPC;
  68        }
  69
  70        /* allocate & copy */
  71        new = kzalloc(new_sz, GFP_KERNEL);
  72        if (!new)
  73                return -ENOMEM;
  74
  75        if (old)
  76                memcpy(new->priomap, old->priomap,
  77                       old->priomap_len * sizeof(old->priomap[0]));
  78
  79        new->priomap_len = new_len;
  80
  81        /* install the new priomap */
  82        rcu_assign_pointer(dev->priomap, new);
  83        if (old)
  84                kfree_rcu(old, rcu);
  85        return 0;
  86}
  87
  88/**
  89 * netprio_prio - return the effective netprio of a cgroup-net_device pair
  90 * @cgrp: cgroup part of the target pair
  91 * @dev: net_device part of the target pair
  92 *
  93 * Should be called under RCU read or rtnl lock.
  94 */
  95static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
  96{
  97        struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
  98
  99        if (map && cgrp->id < map->priomap_len)
 100                return map->priomap[cgrp->id];
 101        return 0;
 102}
 103
 104/**
 105 * netprio_set_prio - set netprio on a cgroup-net_device pair
 106 * @cgrp: cgroup part of the target pair
 107 * @dev: net_device part of the target pair
 108 * @prio: prio to set
 109 *
 110 * Set netprio to @prio on @cgrp-@dev pair.  Should be called under rtnl
 111 * lock and may fail under memory pressure for non-zero @prio.
 112 */
 113static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
 114                            u32 prio)
 115{
 116        struct netprio_map *map;
 117        int ret;
 118
 119        /* avoid extending priomap for zero writes */
 120        map = rtnl_dereference(dev->priomap);
 121        if (!prio && (!map || map->priomap_len <= cgrp->id))
 122                return 0;
 123
 124        ret = extend_netdev_table(dev, cgrp->id);
 125        if (ret)
 126                return ret;
 127
 128        map = rtnl_dereference(dev->priomap);
 129        map->priomap[cgrp->id] = prio;
 130        return 0;
 131}
 132
 133static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 134{
 135        struct cgroup_netprio_state *cs;
 136
 137        cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 138        if (!cs)
 139                return ERR_PTR(-ENOMEM);
 140
 141        return &cs->css;
 142}
 143
 144static int cgrp_css_online(struct cgroup *cgrp)
 145{
 146        struct cgroup *parent = cgrp->parent;
 147        struct net_device *dev;
 148        int ret = 0;
 149
 150        if (!parent)
 151                return 0;
 152
 153        rtnl_lock();
 154        /*
 155         * Inherit prios from the parent.  As all prios are set during
 156         * onlining, there is no need to clear them on offline.
 157         */
 158        for_each_netdev(&init_net, dev) {
 159                u32 prio = netprio_prio(parent, dev);
 160
 161                ret = netprio_set_prio(cgrp, dev, prio);
 162                if (ret)
 163                        break;
 164        }
 165        rtnl_unlock();
 166        return ret;
 167}
 168
 169static void cgrp_css_free(struct cgroup *cgrp)
 170{
 171        kfree(cgrp_netprio_state(cgrp));
 172}
 173
 174static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
 175{
 176        return cgrp->id;
 177}
 178
 179static int read_priomap(struct cgroup *cont, struct cftype *cft,
 180                        struct cgroup_map_cb *cb)
 181{
 182        struct net_device *dev;
 183
 184        rcu_read_lock();
 185        for_each_netdev_rcu(&init_net, dev)
 186                cb->fill(cb, dev->name, netprio_prio(cont, dev));
 187        rcu_read_unlock();
 188        return 0;
 189}
 190
 191static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
 192                         const char *buffer)
 193{
 194        char devname[IFNAMSIZ + 1];
 195        struct net_device *dev;
 196        u32 prio;
 197        int ret;
 198
 199        if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
 200                return -EINVAL;
 201
 202        dev = dev_get_by_name(&init_net, devname);
 203        if (!dev)
 204                return -ENODEV;
 205
 206        rtnl_lock();
 207
 208        ret = netprio_set_prio(cgrp, dev, prio);
 209
 210        rtnl_unlock();
 211        dev_put(dev);
 212        return ret;
 213}
 214
 215static int update_netprio(const void *v, struct file *file, unsigned n)
 216{
 217        int err;
 218        struct socket *sock = sock_from_file(file, &err);
 219        if (sock)
 220                sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
 221        return 0;
 222}
 223
 224static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 225{
 226        struct task_struct *p;
 227        void *v;
 228
 229        cgroup_taskset_for_each(p, cgrp, tset) {
 230                task_lock(p);
 231                v = (void *)(unsigned long)task_netprioidx(p);
 232                iterate_fd(p->files, 0, update_netprio, v);
 233                task_unlock(p);
 234        }
 235}
 236
 237static struct cftype ss_files[] = {
 238        {
 239                .name = "prioidx",
 240                .read_u64 = read_prioidx,
 241        },
 242        {
 243                .name = "ifpriomap",
 244                .read_map = read_priomap,
 245                .write_string = write_priomap,
 246        },
 247        { }     /* terminate */
 248};
 249
 250struct cgroup_subsys net_prio_subsys = {
 251        .name           = "net_prio",
 252        .css_alloc      = cgrp_css_alloc,
 253        .css_online     = cgrp_css_online,
 254        .css_free       = cgrp_css_free,
 255        .attach         = net_prio_attach,
 256        .subsys_id      = net_prio_subsys_id,
 257        .base_cftypes   = ss_files,
 258        .module         = THIS_MODULE,
 259};
 260
 261static int netprio_device_event(struct notifier_block *unused,
 262                                unsigned long event, void *ptr)
 263{
 264        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 265        struct netprio_map *old;
 266
 267        /*
 268         * Note this is called with rtnl_lock held so we have update side
 269         * protection on our rcu assignments
 270         */
 271
 272        switch (event) {
 273        case NETDEV_UNREGISTER:
 274                old = rtnl_dereference(dev->priomap);
 275                RCU_INIT_POINTER(dev->priomap, NULL);
 276                if (old)
 277                        kfree_rcu(old, rcu);
 278                break;
 279        }
 280        return NOTIFY_DONE;
 281}
 282
 283static struct notifier_block netprio_device_notifier = {
 284        .notifier_call = netprio_device_event
 285};
 286
 287static int __init init_cgroup_netprio(void)
 288{
 289        int ret;
 290
 291        ret = cgroup_load_subsys(&net_prio_subsys);
 292        if (ret)
 293                goto out;
 294
 295        register_netdevice_notifier(&netprio_device_notifier);
 296
 297out:
 298        return ret;
 299}
 300
 301static void __exit exit_cgroup_netprio(void)
 302{
 303        struct netprio_map *old;
 304        struct net_device *dev;
 305
 306        unregister_netdevice_notifier(&netprio_device_notifier);
 307
 308        cgroup_unload_subsys(&net_prio_subsys);
 309
 310        rtnl_lock();
 311        for_each_netdev(&init_net, dev) {
 312                old = rtnl_dereference(dev->priomap);
 313                RCU_INIT_POINTER(dev->priomap, NULL);
 314                if (old)
 315                        kfree_rcu(old, rcu);
 316        }
 317        rtnl_unlock();
 318}
 319
 320module_init(init_cgroup_netprio);
 321module_exit(exit_cgroup_netprio);
 322MODULE_LICENSE("GPL v2");
 323