linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct sk_buff_head q;
  71};
  72
  73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  74
  75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  76
  77/* "teql*" qdisc routines */
  78
  79static int
  80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  81{
  82        struct net_device *dev = qdisc_dev(sch);
  83        struct teql_sched_data *q = qdisc_priv(sch);
  84
  85        if (q->q.qlen < dev->tx_queue_len) {
  86                __skb_queue_tail(&q->q, skb);
  87                return NET_XMIT_SUCCESS;
  88        }
  89
  90        return qdisc_drop(skb, sch, to_free);
  91}
  92
  93static struct sk_buff *
  94teql_dequeue(struct Qdisc *sch)
  95{
  96        struct teql_sched_data *dat = qdisc_priv(sch);
  97        struct netdev_queue *dat_queue;
  98        struct sk_buff *skb;
  99        struct Qdisc *q;
 100
 101        skb = __skb_dequeue(&dat->q);
 102        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 103        q = rcu_dereference_bh(dat_queue->qdisc);
 104
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(q);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + q->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static void
 126teql_reset(struct Qdisc *sch)
 127{
 128        struct teql_sched_data *dat = qdisc_priv(sch);
 129
 130        skb_queue_purge(&dat->q);
 131        sch->q.qlen = 0;
 132}
 133
 134static void
 135teql_destroy(struct Qdisc *sch)
 136{
 137        struct Qdisc *q, *prev;
 138        struct teql_sched_data *dat = qdisc_priv(sch);
 139        struct teql_master *master = dat->m;
 140
 141        prev = master->slaves;
 142        if (prev) {
 143                do {
 144                        q = NEXT_SLAVE(prev);
 145                        if (q == sch) {
 146                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 147                                if (q == master->slaves) {
 148                                        master->slaves = NEXT_SLAVE(q);
 149                                        if (q == master->slaves) {
 150                                                struct netdev_queue *txq;
 151                                                spinlock_t *root_lock;
 152
 153                                                txq = netdev_get_tx_queue(master->dev, 0);
 154                                                master->slaves = NULL;
 155
 156                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 157                                                spin_lock_bh(root_lock);
 158                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 159                                                spin_unlock_bh(root_lock);
 160                                        }
 161                                }
 162                                skb_queue_purge(&dat->q);
 163                                break;
 164                        }
 165
 166                } while ((prev = q) != master->slaves);
 167        }
 168}
 169
 170static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 171                           struct netlink_ext_ack *extack)
 172{
 173        struct net_device *dev = qdisc_dev(sch);
 174        struct teql_master *m = (struct teql_master *)sch->ops;
 175        struct teql_sched_data *q = qdisc_priv(sch);
 176
 177        if (dev->hard_header_len > m->dev->hard_header_len)
 178                return -EINVAL;
 179
 180        if (m->dev == dev)
 181                return -ELOOP;
 182
 183        q->m = m;
 184
 185        skb_queue_head_init(&q->q);
 186
 187        if (m->slaves) {
 188                if (m->dev->flags & IFF_UP) {
 189                        if ((m->dev->flags & IFF_POINTOPOINT &&
 190                             !(dev->flags & IFF_POINTOPOINT)) ||
 191                            (m->dev->flags & IFF_BROADCAST &&
 192                             !(dev->flags & IFF_BROADCAST)) ||
 193                            (m->dev->flags & IFF_MULTICAST &&
 194                             !(dev->flags & IFF_MULTICAST)) ||
 195                            dev->mtu < m->dev->mtu)
 196                                return -EINVAL;
 197                } else {
 198                        if (!(dev->flags&IFF_POINTOPOINT))
 199                                m->dev->flags &= ~IFF_POINTOPOINT;
 200                        if (!(dev->flags&IFF_BROADCAST))
 201                                m->dev->flags &= ~IFF_BROADCAST;
 202                        if (!(dev->flags&IFF_MULTICAST))
 203                                m->dev->flags &= ~IFF_MULTICAST;
 204                        if (dev->mtu < m->dev->mtu)
 205                                m->dev->mtu = dev->mtu;
 206                }
 207                q->next = NEXT_SLAVE(m->slaves);
 208                NEXT_SLAVE(m->slaves) = sch;
 209        } else {
 210                q->next = sch;
 211                m->slaves = sch;
 212                m->dev->mtu = dev->mtu;
 213                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 214        }
 215        return 0;
 216}
 217
 218
 219static int
 220__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 221               struct net_device *dev, struct netdev_queue *txq,
 222               struct dst_entry *dst)
 223{
 224        struct neighbour *n;
 225        int err = 0;
 226
 227        n = dst_neigh_lookup_skb(dst, skb);
 228        if (!n)
 229                return -ENOENT;
 230
 231        if (dst->dev != dev) {
 232                struct neighbour *mn;
 233
 234                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 235                neigh_release(n);
 236                if (IS_ERR(mn))
 237                        return PTR_ERR(mn);
 238                n = mn;
 239        }
 240
 241        if (neigh_event_send(n, skb_res) == 0) {
 242                int err;
 243                char haddr[MAX_ADDR_LEN];
 244
 245                neigh_ha_snapshot(haddr, n, dev);
 246                err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
 247                                      haddr, NULL, skb->len);
 248
 249                if (err < 0)
 250                        err = -EINVAL;
 251        } else {
 252                err = (skb_res == NULL) ? -EAGAIN : 1;
 253        }
 254        neigh_release(n);
 255        return err;
 256}
 257
 258static inline int teql_resolve(struct sk_buff *skb,
 259                               struct sk_buff *skb_res,
 260                               struct net_device *dev,
 261                               struct netdev_queue *txq)
 262{
 263        struct dst_entry *dst = skb_dst(skb);
 264        int res;
 265
 266        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 267                return -ENODEV;
 268
 269        if (!dev->header_ops || !dst)
 270                return 0;
 271
 272        rcu_read_lock();
 273        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 274        rcu_read_unlock();
 275
 276        return res;
 277}
 278
 279static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 280{
 281        struct teql_master *master = netdev_priv(dev);
 282        struct Qdisc *start, *q;
 283        int busy;
 284        int nores;
 285        int subq = skb_get_queue_mapping(skb);
 286        struct sk_buff *skb_res = NULL;
 287
 288        start = master->slaves;
 289
 290restart:
 291        nores = 0;
 292        busy = 0;
 293
 294        q = start;
 295        if (!q)
 296                goto drop;
 297
 298        do {
 299                struct net_device *slave = qdisc_dev(q);
 300                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 301
 302                if (slave_txq->qdisc_sleeping != q)
 303                        continue;
 304                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 305                    !netif_running(slave)) {
 306                        busy = 1;
 307                        continue;
 308                }
 309
 310                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 311                case 0:
 312                        if (__netif_tx_trylock(slave_txq)) {
 313                                unsigned int length = qdisc_pkt_len(skb);
 314
 315                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 316                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 317                                    NETDEV_TX_OK) {
 318                                        __netif_tx_unlock(slave_txq);
 319                                        master->slaves = NEXT_SLAVE(q);
 320                                        netif_wake_queue(dev);
 321                                        master->tx_packets++;
 322                                        master->tx_bytes += length;
 323                                        return NETDEV_TX_OK;
 324                                }
 325                                __netif_tx_unlock(slave_txq);
 326                        }
 327                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 328                                busy = 1;
 329                        break;
 330                case 1:
 331                        master->slaves = NEXT_SLAVE(q);
 332                        return NETDEV_TX_OK;
 333                default:
 334                        nores = 1;
 335                        break;
 336                }
 337                __skb_pull(skb, skb_network_offset(skb));
 338        } while ((q = NEXT_SLAVE(q)) != start);
 339
 340        if (nores && skb_res == NULL) {
 341                skb_res = skb;
 342                goto restart;
 343        }
 344
 345        if (busy) {
 346                netif_stop_queue(dev);
 347                return NETDEV_TX_BUSY;
 348        }
 349        master->tx_errors++;
 350
 351drop:
 352        master->tx_dropped++;
 353        dev_kfree_skb(skb);
 354        return NETDEV_TX_OK;
 355}
 356
 357static int teql_master_open(struct net_device *dev)
 358{
 359        struct Qdisc *q;
 360        struct teql_master *m = netdev_priv(dev);
 361        int mtu = 0xFFFE;
 362        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 363
 364        if (m->slaves == NULL)
 365                return -EUNATCH;
 366
 367        flags = FMASK;
 368
 369        q = m->slaves;
 370        do {
 371                struct net_device *slave = qdisc_dev(q);
 372
 373                if (slave == NULL)
 374                        return -EUNATCH;
 375
 376                if (slave->mtu < mtu)
 377                        mtu = slave->mtu;
 378                if (slave->hard_header_len > LL_MAX_HEADER)
 379                        return -EINVAL;
 380
 381                /* If all the slaves are BROADCAST, master is BROADCAST
 382                   If all the slaves are PtP, master is PtP
 383                   Otherwise, master is NBMA.
 384                 */
 385                if (!(slave->flags&IFF_POINTOPOINT))
 386                        flags &= ~IFF_POINTOPOINT;
 387                if (!(slave->flags&IFF_BROADCAST))
 388                        flags &= ~IFF_BROADCAST;
 389                if (!(slave->flags&IFF_MULTICAST))
 390                        flags &= ~IFF_MULTICAST;
 391        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 392
 393        m->dev->mtu = mtu;
 394        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 395        netif_start_queue(m->dev);
 396        return 0;
 397}
 398
 399static int teql_master_close(struct net_device *dev)
 400{
 401        netif_stop_queue(dev);
 402        return 0;
 403}
 404
 405static void teql_master_stats64(struct net_device *dev,
 406                                struct rtnl_link_stats64 *stats)
 407{
 408        struct teql_master *m = netdev_priv(dev);
 409
 410        stats->tx_packets       = m->tx_packets;
 411        stats->tx_bytes         = m->tx_bytes;
 412        stats->tx_errors        = m->tx_errors;
 413        stats->tx_dropped       = m->tx_dropped;
 414}
 415
 416static int teql_master_mtu(struct net_device *dev, int new_mtu)
 417{
 418        struct teql_master *m = netdev_priv(dev);
 419        struct Qdisc *q;
 420
 421        q = m->slaves;
 422        if (q) {
 423                do {
 424                        if (new_mtu > qdisc_dev(q)->mtu)
 425                                return -EINVAL;
 426                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 427        }
 428
 429        dev->mtu = new_mtu;
 430        return 0;
 431}
 432
 433static const struct net_device_ops teql_netdev_ops = {
 434        .ndo_open       = teql_master_open,
 435        .ndo_stop       = teql_master_close,
 436        .ndo_start_xmit = teql_master_xmit,
 437        .ndo_get_stats64 = teql_master_stats64,
 438        .ndo_change_mtu = teql_master_mtu,
 439};
 440
 441static __init void teql_master_setup(struct net_device *dev)
 442{
 443        struct teql_master *master = netdev_priv(dev);
 444        struct Qdisc_ops *ops = &master->qops;
 445
 446        master->dev     = dev;
 447        ops->priv_size  = sizeof(struct teql_sched_data);
 448
 449        ops->enqueue    =       teql_enqueue;
 450        ops->dequeue    =       teql_dequeue;
 451        ops->peek       =       teql_peek;
 452        ops->init       =       teql_qdisc_init;
 453        ops->reset      =       teql_reset;
 454        ops->destroy    =       teql_destroy;
 455        ops->owner      =       THIS_MODULE;
 456
 457        dev->netdev_ops =       &teql_netdev_ops;
 458        dev->type               = ARPHRD_VOID;
 459        dev->mtu                = 1500;
 460        dev->min_mtu            = 68;
 461        dev->max_mtu            = 65535;
 462        dev->tx_queue_len       = 100;
 463        dev->flags              = IFF_NOARP;
 464        dev->hard_header_len    = LL_MAX_HEADER;
 465        netif_keep_dst(dev);
 466}
 467
 468static LIST_HEAD(master_dev_list);
 469static int max_equalizers = 1;
 470module_param(max_equalizers, int, 0);
 471MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 472
 473static int __init teql_init(void)
 474{
 475        int i;
 476        int err = -ENODEV;
 477
 478        for (i = 0; i < max_equalizers; i++) {
 479                struct net_device *dev;
 480                struct teql_master *master;
 481
 482                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 483                                   NET_NAME_UNKNOWN, teql_master_setup);
 484                if (!dev) {
 485                        err = -ENOMEM;
 486                        break;
 487                }
 488
 489                if ((err = register_netdev(dev))) {
 490                        free_netdev(dev);
 491                        break;
 492                }
 493
 494                master = netdev_priv(dev);
 495
 496                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 497                err = register_qdisc(&master->qops);
 498
 499                if (err) {
 500                        unregister_netdev(dev);
 501                        free_netdev(dev);
 502                        break;
 503                }
 504
 505                list_add_tail(&master->master_list, &master_dev_list);
 506        }
 507        return i ? 0 : err;
 508}
 509
 510static void __exit teql_exit(void)
 511{
 512        struct teql_master *master, *nxt;
 513
 514        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 515
 516                list_del(&master->master_list);
 517
 518                unregister_qdisc(&master->qops);
 519                unregister_netdev(master->dev);
 520                free_netdev(master->dev);
 521        }
 522}
 523
 524module_init(teql_init);
 525module_exit(teql_exit);
 526
 527MODULE_LICENSE("GPL");
 528