linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct neighbour *ncache;
  71        struct sk_buff_head q;
  72};
  73
  74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  75
  76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  77
  78/* "teql*" qdisc routines */
  79
  80static int
  81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  82{
  83        struct net_device *dev = qdisc_dev(sch);
  84        struct teql_sched_data *q = qdisc_priv(sch);
  85
  86        if (q->q.qlen < dev->tx_queue_len) {
  87                __skb_queue_tail(&q->q, skb);
  88                return NET_XMIT_SUCCESS;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static struct sk_buff *
  97teql_dequeue(struct Qdisc *sch)
  98{
  99        struct teql_sched_data *dat = qdisc_priv(sch);
 100        struct netdev_queue *dat_queue;
 101        struct sk_buff *skb;
 102
 103        skb = __skb_dequeue(&dat->q);
 104        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static inline void
 126teql_neigh_release(struct neighbour *n)
 127{
 128        if (n)
 129                neigh_release(n);
 130}
 131
 132static void
 133teql_reset(struct Qdisc *sch)
 134{
 135        struct teql_sched_data *dat = qdisc_priv(sch);
 136
 137        skb_queue_purge(&dat->q);
 138        sch->q.qlen = 0;
 139        teql_neigh_release(xchg(&dat->ncache, NULL));
 140}
 141
 142static void
 143teql_destroy(struct Qdisc *sch)
 144{
 145        struct Qdisc *q, *prev;
 146        struct teql_sched_data *dat = qdisc_priv(sch);
 147        struct teql_master *master = dat->m;
 148
 149        prev = master->slaves;
 150        if (prev) {
 151                do {
 152                        q = NEXT_SLAVE(prev);
 153                        if (q == sch) {
 154                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 155                                if (q == master->slaves) {
 156                                        master->slaves = NEXT_SLAVE(q);
 157                                        if (q == master->slaves) {
 158                                                struct netdev_queue *txq;
 159                                                spinlock_t *root_lock;
 160
 161                                                txq = netdev_get_tx_queue(master->dev, 0);
 162                                                master->slaves = NULL;
 163
 164                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 165                                                spin_lock_bh(root_lock);
 166                                                qdisc_reset(txq->qdisc);
 167                                                spin_unlock_bh(root_lock);
 168                                        }
 169                                }
 170                                skb_queue_purge(&dat->q);
 171                                teql_neigh_release(xchg(&dat->ncache, NULL));
 172                                break;
 173                        }
 174
 175                } while ((prev = q) != master->slaves);
 176        }
 177}
 178
 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 180{
 181        struct net_device *dev = qdisc_dev(sch);
 182        struct teql_master *m = (struct teql_master *)sch->ops;
 183        struct teql_sched_data *q = qdisc_priv(sch);
 184
 185        if (dev->hard_header_len > m->dev->hard_header_len)
 186                return -EINVAL;
 187
 188        if (m->dev == dev)
 189                return -ELOOP;
 190
 191        q->m = m;
 192
 193        skb_queue_head_init(&q->q);
 194
 195        if (m->slaves) {
 196                if (m->dev->flags & IFF_UP) {
 197                        if ((m->dev->flags & IFF_POINTOPOINT &&
 198                             !(dev->flags & IFF_POINTOPOINT)) ||
 199                            (m->dev->flags & IFF_BROADCAST &&
 200                             !(dev->flags & IFF_BROADCAST)) ||
 201                            (m->dev->flags & IFF_MULTICAST &&
 202                             !(dev->flags & IFF_MULTICAST)) ||
 203                            dev->mtu < m->dev->mtu)
 204                                return -EINVAL;
 205                } else {
 206                        if (!(dev->flags&IFF_POINTOPOINT))
 207                                m->dev->flags &= ~IFF_POINTOPOINT;
 208                        if (!(dev->flags&IFF_BROADCAST))
 209                                m->dev->flags &= ~IFF_BROADCAST;
 210                        if (!(dev->flags&IFF_MULTICAST))
 211                                m->dev->flags &= ~IFF_MULTICAST;
 212                        if (dev->mtu < m->dev->mtu)
 213                                m->dev->mtu = dev->mtu;
 214                }
 215                q->next = NEXT_SLAVE(m->slaves);
 216                NEXT_SLAVE(m->slaves) = sch;
 217        } else {
 218                q->next = sch;
 219                m->slaves = sch;
 220                m->dev->mtu = dev->mtu;
 221                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 222        }
 223        return 0;
 224}
 225
 226
 227static int
 228__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 229{
 230        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 231        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 232        struct neighbour *mn = skb_dst(skb)->neighbour;
 233        struct neighbour *n = q->ncache;
 234
 235        if (mn->tbl == NULL)
 236                return -EINVAL;
 237        if (n && n->tbl == mn->tbl &&
 238            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 239                atomic_inc(&n->refcnt);
 240        } else {
 241                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 242                if (IS_ERR(n))
 243                        return PTR_ERR(n);
 244        }
 245        if (neigh_event_send(n, skb_res) == 0) {
 246                int err;
 247                char haddr[MAX_ADDR_LEN];
 248
 249                neigh_ha_snapshot(haddr, n, dev);
 250                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 251                                      NULL, skb->len);
 252
 253                if (err < 0) {
 254                        neigh_release(n);
 255                        return -EINVAL;
 256                }
 257                teql_neigh_release(xchg(&q->ncache, n));
 258                return 0;
 259        }
 260        neigh_release(n);
 261        return (skb_res == NULL) ? -EAGAIN : 1;
 262}
 263
 264static inline int teql_resolve(struct sk_buff *skb,
 265                               struct sk_buff *skb_res, struct net_device *dev)
 266{
 267        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 268        if (txq->qdisc == &noop_qdisc)
 269                return -ENODEV;
 270
 271        if (dev->header_ops == NULL ||
 272            skb_dst(skb) == NULL ||
 273            skb_dst(skb)->neighbour == NULL)
 274                return 0;
 275        return __teql_resolve(skb, skb_res, dev);
 276}
 277
 278static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 279{
 280        struct teql_master *master = netdev_priv(dev);
 281        struct Qdisc *start, *q;
 282        int busy;
 283        int nores;
 284        int subq = skb_get_queue_mapping(skb);
 285        struct sk_buff *skb_res = NULL;
 286
 287        start = master->slaves;
 288
 289restart:
 290        nores = 0;
 291        busy = 0;
 292
 293        q = start;
 294        if (!q)
 295                goto drop;
 296
 297        do {
 298                struct net_device *slave = qdisc_dev(q);
 299                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 300                const struct net_device_ops *slave_ops = slave->netdev_ops;
 301
 302                if (slave_txq->qdisc_sleeping != q)
 303                        continue;
 304                if (__netif_subqueue_stopped(slave, subq) ||
 305                    !netif_running(slave)) {
 306                        busy = 1;
 307                        continue;
 308                }
 309
 310                switch (teql_resolve(skb, skb_res, slave)) {
 311                case 0:
 312                        if (__netif_tx_trylock(slave_txq)) {
 313                                unsigned int length = qdisc_pkt_len(skb);
 314
 315                                if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
 316                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 317                                        txq_trans_update(slave_txq);
 318                                        __netif_tx_unlock(slave_txq);
 319                                        master->slaves = NEXT_SLAVE(q);
 320                                        netif_wake_queue(dev);
 321                                        master->tx_packets++;
 322                                        master->tx_bytes += length;
 323                                        return NETDEV_TX_OK;
 324                                }
 325                                __netif_tx_unlock(slave_txq);
 326                        }
 327                        if (netif_queue_stopped(dev))
 328                                busy = 1;
 329                        break;
 330                case 1:
 331                        master->slaves = NEXT_SLAVE(q);
 332                        return NETDEV_TX_OK;
 333                default:
 334                        nores = 1;
 335                        break;
 336                }
 337                __skb_pull(skb, skb_network_offset(skb));
 338        } while ((q = NEXT_SLAVE(q)) != start);
 339
 340        if (nores && skb_res == NULL) {
 341                skb_res = skb;
 342                goto restart;
 343        }
 344
 345        if (busy) {
 346                netif_stop_queue(dev);
 347                return NETDEV_TX_BUSY;
 348        }
 349        master->tx_errors++;
 350
 351drop:
 352        master->tx_dropped++;
 353        dev_kfree_skb(skb);
 354        return NETDEV_TX_OK;
 355}
 356
 357static int teql_master_open(struct net_device *dev)
 358{
 359        struct Qdisc *q;
 360        struct teql_master *m = netdev_priv(dev);
 361        int mtu = 0xFFFE;
 362        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 363
 364        if (m->slaves == NULL)
 365                return -EUNATCH;
 366
 367        flags = FMASK;
 368
 369        q = m->slaves;
 370        do {
 371                struct net_device *slave = qdisc_dev(q);
 372
 373                if (slave == NULL)
 374                        return -EUNATCH;
 375
 376                if (slave->mtu < mtu)
 377                        mtu = slave->mtu;
 378                if (slave->hard_header_len > LL_MAX_HEADER)
 379                        return -EINVAL;
 380
 381                /* If all the slaves are BROADCAST, master is BROADCAST
 382                   If all the slaves are PtP, master is PtP
 383                   Otherwise, master is NBMA.
 384                 */
 385                if (!(slave->flags&IFF_POINTOPOINT))
 386                        flags &= ~IFF_POINTOPOINT;
 387                if (!(slave->flags&IFF_BROADCAST))
 388                        flags &= ~IFF_BROADCAST;
 389                if (!(slave->flags&IFF_MULTICAST))
 390                        flags &= ~IFF_MULTICAST;
 391        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 392
 393        m->dev->mtu = mtu;
 394        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 395        netif_start_queue(m->dev);
 396        return 0;
 397}
 398
 399static int teql_master_close(struct net_device *dev)
 400{
 401        netif_stop_queue(dev);
 402        return 0;
 403}
 404
 405static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 406                                                     struct rtnl_link_stats64 *stats)
 407{
 408        struct teql_master *m = netdev_priv(dev);
 409
 410        stats->tx_packets       = m->tx_packets;
 411        stats->tx_bytes         = m->tx_bytes;
 412        stats->tx_errors        = m->tx_errors;
 413        stats->tx_dropped       = m->tx_dropped;
 414        return stats;
 415}
 416
 417static int teql_master_mtu(struct net_device *dev, int new_mtu)
 418{
 419        struct teql_master *m = netdev_priv(dev);
 420        struct Qdisc *q;
 421
 422        if (new_mtu < 68)
 423                return -EINVAL;
 424
 425        q = m->slaves;
 426        if (q) {
 427                do {
 428                        if (new_mtu > qdisc_dev(q)->mtu)
 429                                return -EINVAL;
 430                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 431        }
 432
 433        dev->mtu = new_mtu;
 434        return 0;
 435}
 436
 437static const struct net_device_ops teql_netdev_ops = {
 438        .ndo_open       = teql_master_open,
 439        .ndo_stop       = teql_master_close,
 440        .ndo_start_xmit = teql_master_xmit,
 441        .ndo_get_stats64 = teql_master_stats64,
 442        .ndo_change_mtu = teql_master_mtu,
 443};
 444
 445static __init void teql_master_setup(struct net_device *dev)
 446{
 447        struct teql_master *master = netdev_priv(dev);
 448        struct Qdisc_ops *ops = &master->qops;
 449
 450        master->dev     = dev;
 451        ops->priv_size  = sizeof(struct teql_sched_data);
 452
 453        ops->enqueue    =       teql_enqueue;
 454        ops->dequeue    =       teql_dequeue;
 455        ops->peek       =       teql_peek;
 456        ops->init       =       teql_qdisc_init;
 457        ops->reset      =       teql_reset;
 458        ops->destroy    =       teql_destroy;
 459        ops->owner      =       THIS_MODULE;
 460
 461        dev->netdev_ops =       &teql_netdev_ops;
 462        dev->type               = ARPHRD_VOID;
 463        dev->mtu                = 1500;
 464        dev->tx_queue_len       = 100;
 465        dev->flags              = IFF_NOARP;
 466        dev->hard_header_len    = LL_MAX_HEADER;
 467        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 468}
 469
 470static LIST_HEAD(master_dev_list);
 471static int max_equalizers = 1;
 472module_param(max_equalizers, int, 0);
 473MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 474
 475static int __init teql_init(void)
 476{
 477        int i;
 478        int err = -ENODEV;
 479
 480        for (i = 0; i < max_equalizers; i++) {
 481                struct net_device *dev;
 482                struct teql_master *master;
 483
 484                dev = alloc_netdev(sizeof(struct teql_master),
 485                                  "teql%d", teql_master_setup);
 486                if (!dev) {
 487                        err = -ENOMEM;
 488                        break;
 489                }
 490
 491                if ((err = register_netdev(dev))) {
 492                        free_netdev(dev);
 493                        break;
 494                }
 495
 496                master = netdev_priv(dev);
 497
 498                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 499                err = register_qdisc(&master->qops);
 500
 501                if (err) {
 502                        unregister_netdev(dev);
 503                        free_netdev(dev);
 504                        break;
 505                }
 506
 507                list_add_tail(&master->master_list, &master_dev_list);
 508        }
 509        return i ? 0 : err;
 510}
 511
 512static void __exit teql_exit(void)
 513{
 514        struct teql_master *master, *nxt;
 515
 516        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 517
 518                list_del(&master->master_list);
 519
 520                unregister_qdisc(&master->qops);
 521                unregister_netdev(master->dev);
 522                free_netdev(master->dev);
 523        }
 524}
 525
 526module_init(teql_init);
 527module_exit(teql_exit);
 528
 529MODULE_LICENSE("GPL");
 530