linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master
  57{
  58        struct Qdisc_ops qops;
  59        struct net_device *dev;
  60        struct Qdisc *slaves;
  61        struct list_head master_list;
  62        unsigned long   tx_bytes;
  63        unsigned long   tx_packets;
  64        unsigned long   tx_errors;
  65        unsigned long   tx_dropped;
  66};
  67
  68struct teql_sched_data
  69{
  70        struct Qdisc *next;
  71        struct teql_master *m;
  72        struct neighbour *ncache;
  73        struct sk_buff_head q;
  74};
  75
  76#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  77
  78#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  79
  80/* "teql*" qdisc routines */
  81
  82static int
  83teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  84{
  85        struct net_device *dev = qdisc_dev(sch);
  86        struct teql_sched_data *q = qdisc_priv(sch);
  87
  88        if (q->q.qlen < dev->tx_queue_len) {
  89                __skb_queue_tail(&q->q, skb);
  90                return NET_XMIT_SUCCESS;
  91        }
  92
  93        kfree_skb(skb);
  94        sch->qstats.drops++;
  95        return NET_XMIT_DROP;
  96}
  97
  98static struct sk_buff *
  99teql_dequeue(struct Qdisc* sch)
 100{
 101        struct teql_sched_data *dat = qdisc_priv(sch);
 102        struct netdev_queue *dat_queue;
 103        struct sk_buff *skb;
 104
 105        skb = __skb_dequeue(&dat->q);
 106        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 107        if (skb == NULL) {
 108                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 109                if (m) {
 110                        dat->m->slaves = sch;
 111                        netif_wake_queue(m);
 112                }
 113        } else {
 114                qdisc_bstats_update(sch, skb);
 115        }
 116        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 117        return skb;
 118}
 119
 120static struct sk_buff *
 121teql_peek(struct Qdisc* sch)
 122{
 123        /* teql is meant to be used as root qdisc */
 124        return NULL;
 125}
 126
 127static __inline__ void
 128teql_neigh_release(struct neighbour *n)
 129{
 130        if (n)
 131                neigh_release(n);
 132}
 133
 134static void
 135teql_reset(struct Qdisc* sch)
 136{
 137        struct teql_sched_data *dat = qdisc_priv(sch);
 138
 139        skb_queue_purge(&dat->q);
 140        sch->q.qlen = 0;
 141        teql_neigh_release(xchg(&dat->ncache, NULL));
 142}
 143
 144static void
 145teql_destroy(struct Qdisc* sch)
 146{
 147        struct Qdisc *q, *prev;
 148        struct teql_sched_data *dat = qdisc_priv(sch);
 149        struct teql_master *master = dat->m;
 150
 151        if ((prev = master->slaves) != NULL) {
 152                do {
 153                        q = NEXT_SLAVE(prev);
 154                        if (q == sch) {
 155                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 156                                if (q == master->slaves) {
 157                                        master->slaves = NEXT_SLAVE(q);
 158                                        if (q == master->slaves) {
 159                                                struct netdev_queue *txq;
 160                                                spinlock_t *root_lock;
 161
 162                                                txq = netdev_get_tx_queue(master->dev, 0);
 163                                                master->slaves = NULL;
 164
 165                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 166                                                spin_lock_bh(root_lock);
 167                                                qdisc_reset(txq->qdisc);
 168                                                spin_unlock_bh(root_lock);
 169                                        }
 170                                }
 171                                skb_queue_purge(&dat->q);
 172                                teql_neigh_release(xchg(&dat->ncache, NULL));
 173                                break;
 174                        }
 175
 176                } while ((prev = q) != master->slaves);
 177        }
 178}
 179
 180static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 181{
 182        struct net_device *dev = qdisc_dev(sch);
 183        struct teql_master *m = (struct teql_master*)sch->ops;
 184        struct teql_sched_data *q = qdisc_priv(sch);
 185
 186        if (dev->hard_header_len > m->dev->hard_header_len)
 187                return -EINVAL;
 188
 189        if (m->dev == dev)
 190                return -ELOOP;
 191
 192        q->m = m;
 193
 194        skb_queue_head_init(&q->q);
 195
 196        if (m->slaves) {
 197                if (m->dev->flags & IFF_UP) {
 198                        if ((m->dev->flags & IFF_POINTOPOINT &&
 199                             !(dev->flags & IFF_POINTOPOINT)) ||
 200                            (m->dev->flags & IFF_BROADCAST &&
 201                             !(dev->flags & IFF_BROADCAST)) ||
 202                            (m->dev->flags & IFF_MULTICAST &&
 203                             !(dev->flags & IFF_MULTICAST)) ||
 204                            dev->mtu < m->dev->mtu)
 205                                return -EINVAL;
 206                } else {
 207                        if (!(dev->flags&IFF_POINTOPOINT))
 208                                m->dev->flags &= ~IFF_POINTOPOINT;
 209                        if (!(dev->flags&IFF_BROADCAST))
 210                                m->dev->flags &= ~IFF_BROADCAST;
 211                        if (!(dev->flags&IFF_MULTICAST))
 212                                m->dev->flags &= ~IFF_MULTICAST;
 213                        if (dev->mtu < m->dev->mtu)
 214                                m->dev->mtu = dev->mtu;
 215                }
 216                q->next = NEXT_SLAVE(m->slaves);
 217                NEXT_SLAVE(m->slaves) = sch;
 218        } else {
 219                q->next = sch;
 220                m->slaves = sch;
 221                m->dev->mtu = dev->mtu;
 222                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 223        }
 224        return 0;
 225}
 226
 227
 228static int
 229__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 230{
 231        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 232        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 233        struct neighbour *mn = skb_dst(skb)->neighbour;
 234        struct neighbour *n = q->ncache;
 235
 236        if (mn->tbl == NULL)
 237                return -EINVAL;
 238        if (n && n->tbl == mn->tbl &&
 239            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 240                atomic_inc(&n->refcnt);
 241        } else {
 242                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 243                if (IS_ERR(n))
 244                        return PTR_ERR(n);
 245        }
 246        if (neigh_event_send(n, skb_res) == 0) {
 247                int err;
 248                char haddr[MAX_ADDR_LEN];
 249
 250                neigh_ha_snapshot(haddr, n, dev);
 251                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 252                                      NULL, skb->len);
 253
 254                if (err < 0) {
 255                        neigh_release(n);
 256                        return -EINVAL;
 257                }
 258                teql_neigh_release(xchg(&q->ncache, n));
 259                return 0;
 260        }
 261        neigh_release(n);
 262        return (skb_res == NULL) ? -EAGAIN : 1;
 263}
 264
 265static inline int teql_resolve(struct sk_buff *skb,
 266                               struct sk_buff *skb_res, struct net_device *dev)
 267{
 268        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 269        if (txq->qdisc == &noop_qdisc)
 270                return -ENODEV;
 271
 272        if (dev->header_ops == NULL ||
 273            skb_dst(skb) == NULL ||
 274            skb_dst(skb)->neighbour == NULL)
 275                return 0;
 276        return __teql_resolve(skb, skb_res, dev);
 277}
 278
 279static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 280{
 281        struct teql_master *master = netdev_priv(dev);
 282        struct Qdisc *start, *q;
 283        int busy;
 284        int nores;
 285        int subq = skb_get_queue_mapping(skb);
 286        struct sk_buff *skb_res = NULL;
 287
 288        start = master->slaves;
 289
 290restart:
 291        nores = 0;
 292        busy = 0;
 293
 294        if ((q = start) == NULL)
 295                goto drop;
 296
 297        do {
 298                struct net_device *slave = qdisc_dev(q);
 299                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 300                const struct net_device_ops *slave_ops = slave->netdev_ops;
 301
 302                if (slave_txq->qdisc_sleeping != q)
 303                        continue;
 304                if (__netif_subqueue_stopped(slave, subq) ||
 305                    !netif_running(slave)) {
 306                        busy = 1;
 307                        continue;
 308                }
 309
 310                switch (teql_resolve(skb, skb_res, slave)) {
 311                case 0:
 312                        if (__netif_tx_trylock(slave_txq)) {
 313                                unsigned int length = qdisc_pkt_len(skb);
 314
 315                                if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
 316                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 317                                        txq_trans_update(slave_txq);
 318                                        __netif_tx_unlock(slave_txq);
 319                                        master->slaves = NEXT_SLAVE(q);
 320                                        netif_wake_queue(dev);
 321                                        master->tx_packets++;
 322                                        master->tx_bytes += length;
 323                                        return NETDEV_TX_OK;
 324                                }
 325                                __netif_tx_unlock(slave_txq);
 326                        }
 327                        if (netif_queue_stopped(dev))
 328                                busy = 1;
 329                        break;
 330                case 1:
 331                        master->slaves = NEXT_SLAVE(q);
 332                        return NETDEV_TX_OK;
 333                default:
 334                        nores = 1;
 335                        break;
 336                }
 337                __skb_pull(skb, skb_network_offset(skb));
 338        } while ((q = NEXT_SLAVE(q)) != start);
 339
 340        if (nores && skb_res == NULL) {
 341                skb_res = skb;
 342                goto restart;
 343        }
 344
 345        if (busy) {
 346                netif_stop_queue(dev);
 347                return NETDEV_TX_BUSY;
 348        }
 349        master->tx_errors++;
 350
 351drop:
 352        master->tx_dropped++;
 353        dev_kfree_skb(skb);
 354        return NETDEV_TX_OK;
 355}
 356
 357static int teql_master_open(struct net_device *dev)
 358{
 359        struct Qdisc * q;
 360        struct teql_master *m = netdev_priv(dev);
 361        int mtu = 0xFFFE;
 362        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 363
 364        if (m->slaves == NULL)
 365                return -EUNATCH;
 366
 367        flags = FMASK;
 368
 369        q = m->slaves;
 370        do {
 371                struct net_device *slave = qdisc_dev(q);
 372
 373                if (slave == NULL)
 374                        return -EUNATCH;
 375
 376                if (slave->mtu < mtu)
 377                        mtu = slave->mtu;
 378                if (slave->hard_header_len > LL_MAX_HEADER)
 379                        return -EINVAL;
 380
 381                /* If all the slaves are BROADCAST, master is BROADCAST
 382                   If all the slaves are PtP, master is PtP
 383                   Otherwise, master is NBMA.
 384                 */
 385                if (!(slave->flags&IFF_POINTOPOINT))
 386                        flags &= ~IFF_POINTOPOINT;
 387                if (!(slave->flags&IFF_BROADCAST))
 388                        flags &= ~IFF_BROADCAST;
 389                if (!(slave->flags&IFF_MULTICAST))
 390                        flags &= ~IFF_MULTICAST;
 391        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 392
 393        m->dev->mtu = mtu;
 394        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 395        netif_start_queue(m->dev);
 396        return 0;
 397}
 398
 399static int teql_master_close(struct net_device *dev)
 400{
 401        netif_stop_queue(dev);
 402        return 0;
 403}
 404
 405static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 406                                                     struct rtnl_link_stats64 *stats)
 407{
 408        struct teql_master *m = netdev_priv(dev);
 409
 410        stats->tx_packets       = m->tx_packets;
 411        stats->tx_bytes         = m->tx_bytes;
 412        stats->tx_errors        = m->tx_errors;
 413        stats->tx_dropped       = m->tx_dropped;
 414        return stats;
 415}
 416
 417static int teql_master_mtu(struct net_device *dev, int new_mtu)
 418{
 419        struct teql_master *m = netdev_priv(dev);
 420        struct Qdisc *q;
 421
 422        if (new_mtu < 68)
 423                return -EINVAL;
 424
 425        q = m->slaves;
 426        if (q) {
 427                do {
 428                        if (new_mtu > qdisc_dev(q)->mtu)
 429                                return -EINVAL;
 430                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 431        }
 432
 433        dev->mtu = new_mtu;
 434        return 0;
 435}
 436
 437static const struct net_device_ops teql_netdev_ops = {
 438        .ndo_open       = teql_master_open,
 439        .ndo_stop       = teql_master_close,
 440        .ndo_start_xmit = teql_master_xmit,
 441        .ndo_get_stats64 = teql_master_stats64,
 442        .ndo_change_mtu = teql_master_mtu,
 443};
 444
 445static __init void teql_master_setup(struct net_device *dev)
 446{
 447        struct teql_master *master = netdev_priv(dev);
 448        struct Qdisc_ops *ops = &master->qops;
 449
 450        master->dev     = dev;
 451        ops->priv_size  = sizeof(struct teql_sched_data);
 452
 453        ops->enqueue    =       teql_enqueue;
 454        ops->dequeue    =       teql_dequeue;
 455        ops->peek       =       teql_peek;
 456        ops->init       =       teql_qdisc_init;
 457        ops->reset      =       teql_reset;
 458        ops->destroy    =       teql_destroy;
 459        ops->owner      =       THIS_MODULE;
 460
 461        dev->netdev_ops =       &teql_netdev_ops;
 462        dev->type               = ARPHRD_VOID;
 463        dev->mtu                = 1500;
 464        dev->tx_queue_len       = 100;
 465        dev->flags              = IFF_NOARP;
 466        dev->hard_header_len    = LL_MAX_HEADER;
 467        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 468}
 469
 470static LIST_HEAD(master_dev_list);
 471static int max_equalizers = 1;
 472module_param(max_equalizers, int, 0);
 473MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 474
 475static int __init teql_init(void)
 476{
 477        int i;
 478        int err = -ENODEV;
 479
 480        for (i = 0; i < max_equalizers; i++) {
 481                struct net_device *dev;
 482                struct teql_master *master;
 483
 484                dev = alloc_netdev(sizeof(struct teql_master),
 485                                  "teql%d", teql_master_setup);
 486                if (!dev) {
 487                        err = -ENOMEM;
 488                        break;
 489                }
 490
 491                if ((err = register_netdev(dev))) {
 492                        free_netdev(dev);
 493                        break;
 494                }
 495
 496                master = netdev_priv(dev);
 497
 498                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 499                err = register_qdisc(&master->qops);
 500
 501                if (err) {
 502                        unregister_netdev(dev);
 503                        free_netdev(dev);
 504                        break;
 505                }
 506
 507                list_add_tail(&master->master_list, &master_dev_list);
 508        }
 509        return i ? 0 : err;
 510}
 511
 512static void __exit teql_exit(void)
 513{
 514        struct teql_master *master, *nxt;
 515
 516        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 517
 518                list_del(&master->master_list);
 519
 520                unregister_qdisc(&master->qops);
 521                unregister_netdev(master->dev);
 522                free_netdev(master->dev);
 523        }
 524}
 525
 526module_init(teql_init);
 527module_exit(teql_exit);
 528
 529MODULE_LICENSE("GPL");
 530