linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/if_arp.h>
  17#include <linux/netdevice.h>
  18#include <linux/init.h>
  19#include <linux/skbuff.h>
  20#include <linux/moduleparam.h>
  21#include <net/dst.h>
  22#include <net/neighbour.h>
  23#include <net/pkt_sched.h>
  24
  25/*
  26   How to setup it.
  27   ----------------
  28
  29   After loading this module you will find a new device teqlN
  30   and new qdisc with the same name. To join a slave to the equalizer
  31   you should just set this qdisc on a device f.e.
  32
  33   # tc qdisc add dev eth0 root teql0
  34   # tc qdisc add dev eth1 root teql0
  35
  36   That's all. Full PnP 8)
  37
  38   Applicability.
  39   --------------
  40
  41   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  42      signal and generate EOI events. If you want to equalize virtual devices
  43      like tunnels, use a normal eql device.
  44   2. This device puts no limitations on physical slave characteristics
  45      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  46      Certainly, large difference in link speeds will make the resulting
  47      eqalized link unusable, because of huge packet reordering.
  48      I estimate an upper useful difference as ~10 times.
  49   3. If the slave requires address resolution, only protocols using
  50      neighbour cache (IPv4/IPv6) will work over the equalized link.
  51      Other protocols are still allowed to use the slave device directly,
  52      which will not break load balancing, though native slave
  53      traffic will have the highest priority.  */
  54
  55struct teql_master
  56{
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61};
  62
  63struct teql_sched_data
  64{
  65        struct Qdisc *next;
  66        struct teql_master *m;
  67        struct neighbour *ncache;
  68        struct sk_buff_head q;
  69};
  70
  71#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  72
  73#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  74
  75/* "teql*" qdisc routines */
  76
  77static int
  78teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  79{
  80        struct net_device *dev = qdisc_dev(sch);
  81        struct teql_sched_data *q = qdisc_priv(sch);
  82
  83        if (q->q.qlen < dev->tx_queue_len) {
  84                __skb_queue_tail(&q->q, skb);
  85                sch->bstats.bytes += qdisc_pkt_len(skb);
  86                sch->bstats.packets++;
  87                return 0;
  88        }
  89
  90        kfree_skb(skb);
  91        sch->qstats.drops++;
  92        return NET_XMIT_DROP;
  93}
  94
  95static struct sk_buff *
  96teql_dequeue(struct Qdisc* sch)
  97{
  98        struct teql_sched_data *dat = qdisc_priv(sch);
  99        struct netdev_queue *dat_queue;
 100        struct sk_buff *skb;
 101
 102        skb = __skb_dequeue(&dat->q);
 103        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 104        if (skb == NULL) {
 105                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 106                if (m) {
 107                        dat->m->slaves = sch;
 108                        netif_wake_queue(m);
 109                }
 110        }
 111        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 112        return skb;
 113}
 114
 115static struct sk_buff *
 116teql_peek(struct Qdisc* sch)
 117{
 118        /* teql is meant to be used as root qdisc */
 119        return NULL;
 120}
 121
 122static __inline__ void
 123teql_neigh_release(struct neighbour *n)
 124{
 125        if (n)
 126                neigh_release(n);
 127}
 128
 129static void
 130teql_reset(struct Qdisc* sch)
 131{
 132        struct teql_sched_data *dat = qdisc_priv(sch);
 133
 134        skb_queue_purge(&dat->q);
 135        sch->q.qlen = 0;
 136        teql_neigh_release(xchg(&dat->ncache, NULL));
 137}
 138
 139static void
 140teql_destroy(struct Qdisc* sch)
 141{
 142        struct Qdisc *q, *prev;
 143        struct teql_sched_data *dat = qdisc_priv(sch);
 144        struct teql_master *master = dat->m;
 145
 146        if ((prev = master->slaves) != NULL) {
 147                do {
 148                        q = NEXT_SLAVE(prev);
 149                        if (q == sch) {
 150                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 151                                if (q == master->slaves) {
 152                                        master->slaves = NEXT_SLAVE(q);
 153                                        if (q == master->slaves) {
 154                                                struct netdev_queue *txq;
 155                                                spinlock_t *root_lock;
 156
 157                                                txq = netdev_get_tx_queue(master->dev, 0);
 158                                                master->slaves = NULL;
 159
 160                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 161                                                spin_lock_bh(root_lock);
 162                                                qdisc_reset(txq->qdisc);
 163                                                spin_unlock_bh(root_lock);
 164                                        }
 165                                }
 166                                skb_queue_purge(&dat->q);
 167                                teql_neigh_release(xchg(&dat->ncache, NULL));
 168                                break;
 169                        }
 170
 171                } while ((prev = q) != master->slaves);
 172        }
 173}
 174
 175static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 176{
 177        struct net_device *dev = qdisc_dev(sch);
 178        struct teql_master *m = (struct teql_master*)sch->ops;
 179        struct teql_sched_data *q = qdisc_priv(sch);
 180
 181        if (dev->hard_header_len > m->dev->hard_header_len)
 182                return -EINVAL;
 183
 184        if (m->dev == dev)
 185                return -ELOOP;
 186
 187        q->m = m;
 188
 189        skb_queue_head_init(&q->q);
 190
 191        if (m->slaves) {
 192                if (m->dev->flags & IFF_UP) {
 193                        if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
 194                            || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
 195                            || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
 196                            || dev->mtu < m->dev->mtu)
 197                                return -EINVAL;
 198                } else {
 199                        if (!(dev->flags&IFF_POINTOPOINT))
 200                                m->dev->flags &= ~IFF_POINTOPOINT;
 201                        if (!(dev->flags&IFF_BROADCAST))
 202                                m->dev->flags &= ~IFF_BROADCAST;
 203                        if (!(dev->flags&IFF_MULTICAST))
 204                                m->dev->flags &= ~IFF_MULTICAST;
 205                        if (dev->mtu < m->dev->mtu)
 206                                m->dev->mtu = dev->mtu;
 207                }
 208                q->next = NEXT_SLAVE(m->slaves);
 209                NEXT_SLAVE(m->slaves) = sch;
 210        } else {
 211                q->next = sch;
 212                m->slaves = sch;
 213                m->dev->mtu = dev->mtu;
 214                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 215        }
 216        return 0;
 217}
 218
 219
 220static int
 221__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 222{
 223        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 224        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 225        struct neighbour *mn = skb_dst(skb)->neighbour;
 226        struct neighbour *n = q->ncache;
 227
 228        if (mn->tbl == NULL)
 229                return -EINVAL;
 230        if (n && n->tbl == mn->tbl &&
 231            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 232                atomic_inc(&n->refcnt);
 233        } else {
 234                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 235                if (IS_ERR(n))
 236                        return PTR_ERR(n);
 237        }
 238        if (neigh_event_send(n, skb_res) == 0) {
 239                int err;
 240
 241                read_lock(&n->lock);
 242                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 243                                      n->ha, NULL, skb->len);
 244                read_unlock(&n->lock);
 245
 246                if (err < 0) {
 247                        neigh_release(n);
 248                        return -EINVAL;
 249                }
 250                teql_neigh_release(xchg(&q->ncache, n));
 251                return 0;
 252        }
 253        neigh_release(n);
 254        return (skb_res == NULL) ? -EAGAIN : 1;
 255}
 256
 257static inline int teql_resolve(struct sk_buff *skb,
 258                               struct sk_buff *skb_res, struct net_device *dev)
 259{
 260        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 261        if (txq->qdisc == &noop_qdisc)
 262                return -ENODEV;
 263
 264        if (dev->header_ops == NULL ||
 265            skb_dst(skb) == NULL ||
 266            skb_dst(skb)->neighbour == NULL)
 267                return 0;
 268        return __teql_resolve(skb, skb_res, dev);
 269}
 270
 271static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 272{
 273        struct teql_master *master = netdev_priv(dev);
 274        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 275        struct Qdisc *start, *q;
 276        int busy;
 277        int nores;
 278        int subq = skb_get_queue_mapping(skb);
 279        struct sk_buff *skb_res = NULL;
 280
 281        start = master->slaves;
 282
 283restart:
 284        nores = 0;
 285        busy = 0;
 286
 287        if ((q = start) == NULL)
 288                goto drop;
 289
 290        do {
 291                struct net_device *slave = qdisc_dev(q);
 292                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 293                const struct net_device_ops *slave_ops = slave->netdev_ops;
 294
 295                if (slave_txq->qdisc_sleeping != q)
 296                        continue;
 297                if (__netif_subqueue_stopped(slave, subq) ||
 298                    !netif_running(slave)) {
 299                        busy = 1;
 300                        continue;
 301                }
 302
 303                switch (teql_resolve(skb, skb_res, slave)) {
 304                case 0:
 305                        if (__netif_tx_trylock(slave_txq)) {
 306                                unsigned int length = qdisc_pkt_len(skb);
 307
 308                                if (!netif_tx_queue_stopped(slave_txq) &&
 309                                    !netif_tx_queue_frozen(slave_txq) &&
 310                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 311                                        txq_trans_update(slave_txq);
 312                                        __netif_tx_unlock(slave_txq);
 313                                        master->slaves = NEXT_SLAVE(q);
 314                                        netif_wake_queue(dev);
 315                                        txq->tx_packets++;
 316                                        txq->tx_bytes += length;
 317                                        return NETDEV_TX_OK;
 318                                }
 319                                __netif_tx_unlock(slave_txq);
 320                        }
 321                        if (netif_queue_stopped(dev))
 322                                busy = 1;
 323                        break;
 324                case 1:
 325                        master->slaves = NEXT_SLAVE(q);
 326                        return NETDEV_TX_OK;
 327                default:
 328                        nores = 1;
 329                        break;
 330                }
 331                __skb_pull(skb, skb_network_offset(skb));
 332        } while ((q = NEXT_SLAVE(q)) != start);
 333
 334        if (nores && skb_res == NULL) {
 335                skb_res = skb;
 336                goto restart;
 337        }
 338
 339        if (busy) {
 340                netif_stop_queue(dev);
 341                return NETDEV_TX_BUSY;
 342        }
 343        dev->stats.tx_errors++;
 344
 345drop:
 346        txq->tx_dropped++;
 347        dev_kfree_skb(skb);
 348        return NETDEV_TX_OK;
 349}
 350
 351static int teql_master_open(struct net_device *dev)
 352{
 353        struct Qdisc * q;
 354        struct teql_master *m = netdev_priv(dev);
 355        int mtu = 0xFFFE;
 356        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 357
 358        if (m->slaves == NULL)
 359                return -EUNATCH;
 360
 361        flags = FMASK;
 362
 363        q = m->slaves;
 364        do {
 365                struct net_device *slave = qdisc_dev(q);
 366
 367                if (slave == NULL)
 368                        return -EUNATCH;
 369
 370                if (slave->mtu < mtu)
 371                        mtu = slave->mtu;
 372                if (slave->hard_header_len > LL_MAX_HEADER)
 373                        return -EINVAL;
 374
 375                /* If all the slaves are BROADCAST, master is BROADCAST
 376                   If all the slaves are PtP, master is PtP
 377                   Otherwise, master is NBMA.
 378                 */
 379                if (!(slave->flags&IFF_POINTOPOINT))
 380                        flags &= ~IFF_POINTOPOINT;
 381                if (!(slave->flags&IFF_BROADCAST))
 382                        flags &= ~IFF_BROADCAST;
 383                if (!(slave->flags&IFF_MULTICAST))
 384                        flags &= ~IFF_MULTICAST;
 385        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 386
 387        m->dev->mtu = mtu;
 388        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 389        netif_start_queue(m->dev);
 390        return 0;
 391}
 392
 393static int teql_master_close(struct net_device *dev)
 394{
 395        netif_stop_queue(dev);
 396        return 0;
 397}
 398
 399static int teql_master_mtu(struct net_device *dev, int new_mtu)
 400{
 401        struct teql_master *m = netdev_priv(dev);
 402        struct Qdisc *q;
 403
 404        if (new_mtu < 68)
 405                return -EINVAL;
 406
 407        q = m->slaves;
 408        if (q) {
 409                do {
 410                        if (new_mtu > qdisc_dev(q)->mtu)
 411                                return -EINVAL;
 412                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 413        }
 414
 415        dev->mtu = new_mtu;
 416        return 0;
 417}
 418
 419static const struct net_device_ops teql_netdev_ops = {
 420        .ndo_open       = teql_master_open,
 421        .ndo_stop       = teql_master_close,
 422        .ndo_start_xmit = teql_master_xmit,
 423        .ndo_change_mtu = teql_master_mtu,
 424};
 425
 426static __init void teql_master_setup(struct net_device *dev)
 427{
 428        struct teql_master *master = netdev_priv(dev);
 429        struct Qdisc_ops *ops = &master->qops;
 430
 431        master->dev     = dev;
 432        ops->priv_size  = sizeof(struct teql_sched_data);
 433
 434        ops->enqueue    =       teql_enqueue;
 435        ops->dequeue    =       teql_dequeue;
 436        ops->peek       =       teql_peek;
 437        ops->init       =       teql_qdisc_init;
 438        ops->reset      =       teql_reset;
 439        ops->destroy    =       teql_destroy;
 440        ops->owner      =       THIS_MODULE;
 441
 442        dev->netdev_ops =       &teql_netdev_ops;
 443        dev->type               = ARPHRD_VOID;
 444        dev->mtu                = 1500;
 445        dev->tx_queue_len       = 100;
 446        dev->flags              = IFF_NOARP;
 447        dev->hard_header_len    = LL_MAX_HEADER;
 448}
 449
 450static LIST_HEAD(master_dev_list);
 451static int max_equalizers = 1;
 452module_param(max_equalizers, int, 0);
 453MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 454
 455static int __init teql_init(void)
 456{
 457        int i;
 458        int err = -ENODEV;
 459
 460        for (i = 0; i < max_equalizers; i++) {
 461                struct net_device *dev;
 462                struct teql_master *master;
 463
 464                dev = alloc_netdev(sizeof(struct teql_master),
 465                                  "teql%d", teql_master_setup);
 466                if (!dev) {
 467                        err = -ENOMEM;
 468                        break;
 469                }
 470
 471                if ((err = register_netdev(dev))) {
 472                        free_netdev(dev);
 473                        break;
 474                }
 475
 476                master = netdev_priv(dev);
 477
 478                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 479                err = register_qdisc(&master->qops);
 480
 481                if (err) {
 482                        unregister_netdev(dev);
 483                        free_netdev(dev);
 484                        break;
 485                }
 486
 487                list_add_tail(&master->master_list, &master_dev_list);
 488        }
 489        return i ? 0 : err;
 490}
 491
 492static void __exit teql_exit(void)
 493{
 494        struct teql_master *master, *nxt;
 495
 496        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 497
 498                list_del(&master->master_list);
 499
 500                unregister_qdisc(&master->qops);
 501                unregister_netdev(master->dev);
 502                free_netdev(master->dev);
 503        }
 504}
 505
 506module_init(teql_init);
 507module_exit(teql_exit);
 508
 509MODULE_LICENSE("GPL");
 510