linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct sk_buff_head q;
  71};
  72
  73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  74
  75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  76
  77/* "teql*" qdisc routines */
  78
  79static int
  80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  81{
  82        struct net_device *dev = qdisc_dev(sch);
  83        struct teql_sched_data *q = qdisc_priv(sch);
  84
  85        if (q->q.qlen < dev->tx_queue_len) {
  86                __skb_queue_tail(&q->q, skb);
  87                return NET_XMIT_SUCCESS;
  88        }
  89
  90        return qdisc_drop(skb, sch);
  91}
  92
  93static struct sk_buff *
  94teql_dequeue(struct Qdisc *sch)
  95{
  96        struct teql_sched_data *dat = qdisc_priv(sch);
  97        struct netdev_queue *dat_queue;
  98        struct sk_buff *skb;
  99        struct Qdisc *q;
 100
 101        skb = __skb_dequeue(&dat->q);
 102        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 103        q = rcu_dereference_bh(dat_queue->qdisc);
 104
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(q);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + q->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static void
 126teql_reset(struct Qdisc *sch)
 127{
 128        struct teql_sched_data *dat = qdisc_priv(sch);
 129
 130        skb_queue_purge(&dat->q);
 131        sch->q.qlen = 0;
 132}
 133
 134static void
 135teql_destroy(struct Qdisc *sch)
 136{
 137        struct Qdisc *q, *prev;
 138        struct teql_sched_data *dat = qdisc_priv(sch);
 139        struct teql_master *master = dat->m;
 140
 141        prev = master->slaves;
 142        if (prev) {
 143                do {
 144                        q = NEXT_SLAVE(prev);
 145                        if (q == sch) {
 146                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 147                                if (q == master->slaves) {
 148                                        master->slaves = NEXT_SLAVE(q);
 149                                        if (q == master->slaves) {
 150                                                struct netdev_queue *txq;
 151                                                spinlock_t *root_lock;
 152
 153                                                txq = netdev_get_tx_queue(master->dev, 0);
 154                                                master->slaves = NULL;
 155
 156                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 157                                                spin_lock_bh(root_lock);
 158                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 159                                                spin_unlock_bh(root_lock);
 160                                        }
 161                                }
 162                                skb_queue_purge(&dat->q);
 163                                break;
 164                        }
 165
 166                } while ((prev = q) != master->slaves);
 167        }
 168}
 169
 170static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 171{
 172        struct net_device *dev = qdisc_dev(sch);
 173        struct teql_master *m = (struct teql_master *)sch->ops;
 174        struct teql_sched_data *q = qdisc_priv(sch);
 175
 176        if (dev->hard_header_len > m->dev->hard_header_len)
 177                return -EINVAL;
 178
 179        if (m->dev == dev)
 180                return -ELOOP;
 181
 182        q->m = m;
 183
 184        skb_queue_head_init(&q->q);
 185
 186        if (m->slaves) {
 187                if (m->dev->flags & IFF_UP) {
 188                        if ((m->dev->flags & IFF_POINTOPOINT &&
 189                             !(dev->flags & IFF_POINTOPOINT)) ||
 190                            (m->dev->flags & IFF_BROADCAST &&
 191                             !(dev->flags & IFF_BROADCAST)) ||
 192                            (m->dev->flags & IFF_MULTICAST &&
 193                             !(dev->flags & IFF_MULTICAST)) ||
 194                            dev->mtu < m->dev->mtu)
 195                                return -EINVAL;
 196                } else {
 197                        if (!(dev->flags&IFF_POINTOPOINT))
 198                                m->dev->flags &= ~IFF_POINTOPOINT;
 199                        if (!(dev->flags&IFF_BROADCAST))
 200                                m->dev->flags &= ~IFF_BROADCAST;
 201                        if (!(dev->flags&IFF_MULTICAST))
 202                                m->dev->flags &= ~IFF_MULTICAST;
 203                        if (dev->mtu < m->dev->mtu)
 204                                m->dev->mtu = dev->mtu;
 205                }
 206                q->next = NEXT_SLAVE(m->slaves);
 207                NEXT_SLAVE(m->slaves) = sch;
 208        } else {
 209                q->next = sch;
 210                m->slaves = sch;
 211                m->dev->mtu = dev->mtu;
 212                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 213        }
 214        return 0;
 215}
 216
 217
 218static int
 219__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 220               struct net_device *dev, struct netdev_queue *txq,
 221               struct dst_entry *dst)
 222{
 223        struct neighbour *n;
 224        int err = 0;
 225
 226        n = dst_neigh_lookup_skb(dst, skb);
 227        if (!n)
 228                return -ENOENT;
 229
 230        if (dst->dev != dev) {
 231                struct neighbour *mn;
 232
 233                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 234                neigh_release(n);
 235                if (IS_ERR(mn))
 236                        return PTR_ERR(mn);
 237                n = mn;
 238        }
 239
 240        if (neigh_event_send(n, skb_res) == 0) {
 241                int err;
 242                char haddr[MAX_ADDR_LEN];
 243
 244                neigh_ha_snapshot(haddr, n, dev);
 245                err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
 246                                      haddr, NULL, skb->len);
 247
 248                if (err < 0)
 249                        err = -EINVAL;
 250        } else {
 251                err = (skb_res == NULL) ? -EAGAIN : 1;
 252        }
 253        neigh_release(n);
 254        return err;
 255}
 256
 257static inline int teql_resolve(struct sk_buff *skb,
 258                               struct sk_buff *skb_res,
 259                               struct net_device *dev,
 260                               struct netdev_queue *txq)
 261{
 262        struct dst_entry *dst = skb_dst(skb);
 263        int res;
 264
 265        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 266                return -ENODEV;
 267
 268        if (!dev->header_ops || !dst)
 269                return 0;
 270
 271        rcu_read_lock();
 272        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 273        rcu_read_unlock();
 274
 275        return res;
 276}
 277
 278static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 279{
 280        struct teql_master *master = netdev_priv(dev);
 281        struct Qdisc *start, *q;
 282        int busy;
 283        int nores;
 284        int subq = skb_get_queue_mapping(skb);
 285        struct sk_buff *skb_res = NULL;
 286
 287        start = master->slaves;
 288
 289restart:
 290        nores = 0;
 291        busy = 0;
 292
 293        q = start;
 294        if (!q)
 295                goto drop;
 296
 297        do {
 298                struct net_device *slave = qdisc_dev(q);
 299                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 300
 301                if (slave_txq->qdisc_sleeping != q)
 302                        continue;
 303                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 304                    !netif_running(slave)) {
 305                        busy = 1;
 306                        continue;
 307                }
 308
 309                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 310                case 0:
 311                        if (__netif_tx_trylock(slave_txq)) {
 312                                unsigned int length = qdisc_pkt_len(skb);
 313
 314                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 315                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 316                                    NETDEV_TX_OK) {
 317                                        __netif_tx_unlock(slave_txq);
 318                                        master->slaves = NEXT_SLAVE(q);
 319                                        netif_wake_queue(dev);
 320                                        master->tx_packets++;
 321                                        master->tx_bytes += length;
 322                                        return NETDEV_TX_OK;
 323                                }
 324                                __netif_tx_unlock(slave_txq);
 325                        }
 326                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 327                                busy = 1;
 328                        break;
 329                case 1:
 330                        master->slaves = NEXT_SLAVE(q);
 331                        return NETDEV_TX_OK;
 332                default:
 333                        nores = 1;
 334                        break;
 335                }
 336                __skb_pull(skb, skb_network_offset(skb));
 337        } while ((q = NEXT_SLAVE(q)) != start);
 338
 339        if (nores && skb_res == NULL) {
 340                skb_res = skb;
 341                goto restart;
 342        }
 343
 344        if (busy) {
 345                netif_stop_queue(dev);
 346                return NETDEV_TX_BUSY;
 347        }
 348        master->tx_errors++;
 349
 350drop:
 351        master->tx_dropped++;
 352        dev_kfree_skb(skb);
 353        return NETDEV_TX_OK;
 354}
 355
 356static int teql_master_open(struct net_device *dev)
 357{
 358        struct Qdisc *q;
 359        struct teql_master *m = netdev_priv(dev);
 360        int mtu = 0xFFFE;
 361        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 362
 363        if (m->slaves == NULL)
 364                return -EUNATCH;
 365
 366        flags = FMASK;
 367
 368        q = m->slaves;
 369        do {
 370                struct net_device *slave = qdisc_dev(q);
 371
 372                if (slave == NULL)
 373                        return -EUNATCH;
 374
 375                if (slave->mtu < mtu)
 376                        mtu = slave->mtu;
 377                if (slave->hard_header_len > LL_MAX_HEADER)
 378                        return -EINVAL;
 379
 380                /* If all the slaves are BROADCAST, master is BROADCAST
 381                   If all the slaves are PtP, master is PtP
 382                   Otherwise, master is NBMA.
 383                 */
 384                if (!(slave->flags&IFF_POINTOPOINT))
 385                        flags &= ~IFF_POINTOPOINT;
 386                if (!(slave->flags&IFF_BROADCAST))
 387                        flags &= ~IFF_BROADCAST;
 388                if (!(slave->flags&IFF_MULTICAST))
 389                        flags &= ~IFF_MULTICAST;
 390        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 391
 392        m->dev->mtu = mtu;
 393        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 394        netif_start_queue(m->dev);
 395        return 0;
 396}
 397
 398static int teql_master_close(struct net_device *dev)
 399{
 400        netif_stop_queue(dev);
 401        return 0;
 402}
 403
 404static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 405                                                     struct rtnl_link_stats64 *stats)
 406{
 407        struct teql_master *m = netdev_priv(dev);
 408
 409        stats->tx_packets       = m->tx_packets;
 410        stats->tx_bytes         = m->tx_bytes;
 411        stats->tx_errors        = m->tx_errors;
 412        stats->tx_dropped       = m->tx_dropped;
 413        return stats;
 414}
 415
 416static int teql_master_mtu(struct net_device *dev, int new_mtu)
 417{
 418        struct teql_master *m = netdev_priv(dev);
 419        struct Qdisc *q;
 420
 421        if (new_mtu < 68)
 422                return -EINVAL;
 423
 424        q = m->slaves;
 425        if (q) {
 426                do {
 427                        if (new_mtu > qdisc_dev(q)->mtu)
 428                                return -EINVAL;
 429                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 430        }
 431
 432        dev->mtu = new_mtu;
 433        return 0;
 434}
 435
 436static const struct net_device_ops teql_netdev_ops = {
 437        .ndo_open       = teql_master_open,
 438        .ndo_stop       = teql_master_close,
 439        .ndo_start_xmit = teql_master_xmit,
 440        .ndo_get_stats64 = teql_master_stats64,
 441        .ndo_change_mtu = teql_master_mtu,
 442};
 443
 444static __init void teql_master_setup(struct net_device *dev)
 445{
 446        struct teql_master *master = netdev_priv(dev);
 447        struct Qdisc_ops *ops = &master->qops;
 448
 449        master->dev     = dev;
 450        ops->priv_size  = sizeof(struct teql_sched_data);
 451
 452        ops->enqueue    =       teql_enqueue;
 453        ops->dequeue    =       teql_dequeue;
 454        ops->peek       =       teql_peek;
 455        ops->init       =       teql_qdisc_init;
 456        ops->reset      =       teql_reset;
 457        ops->destroy    =       teql_destroy;
 458        ops->owner      =       THIS_MODULE;
 459
 460        dev->netdev_ops =       &teql_netdev_ops;
 461        dev->type               = ARPHRD_VOID;
 462        dev->mtu                = 1500;
 463        dev->tx_queue_len       = 100;
 464        dev->flags              = IFF_NOARP;
 465        dev->hard_header_len    = LL_MAX_HEADER;
 466        netif_keep_dst(dev);
 467}
 468
 469static LIST_HEAD(master_dev_list);
 470static int max_equalizers = 1;
 471module_param(max_equalizers, int, 0);
 472MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 473
 474static int __init teql_init(void)
 475{
 476        int i;
 477        int err = -ENODEV;
 478
 479        for (i = 0; i < max_equalizers; i++) {
 480                struct net_device *dev;
 481                struct teql_master *master;
 482
 483                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 484                                   NET_NAME_UNKNOWN, teql_master_setup);
 485                if (!dev) {
 486                        err = -ENOMEM;
 487                        break;
 488                }
 489
 490                if ((err = register_netdev(dev))) {
 491                        free_netdev(dev);
 492                        break;
 493                }
 494
 495                master = netdev_priv(dev);
 496
 497                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 498                err = register_qdisc(&master->qops);
 499
 500                if (err) {
 501                        unregister_netdev(dev);
 502                        free_netdev(dev);
 503                        break;
 504                }
 505
 506                list_add_tail(&master->master_list, &master_dev_list);
 507        }
 508        return i ? 0 : err;
 509}
 510
 511static void __exit teql_exit(void)
 512{
 513        struct teql_master *master, *nxt;
 514
 515        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 516
 517                list_del(&master->master_list);
 518
 519                unregister_qdisc(&master->qops);
 520                unregister_netdev(master->dev);
 521                free_netdev(master->dev);
 522        }
 523}
 524
 525module_init(teql_init);
 526module_exit(teql_exit);
 527
 528MODULE_LICENSE("GPL");
 529