linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct neighbour *ncache;
  71        struct sk_buff_head q;
  72};
  73
  74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  75
  76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  77
  78/* "teql*" qdisc routines */
  79
  80static int
  81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  82{
  83        struct net_device *dev = qdisc_dev(sch);
  84        struct teql_sched_data *q = qdisc_priv(sch);
  85
  86        if (q->q.qlen < dev->tx_queue_len) {
  87                __skb_queue_tail(&q->q, skb);
  88                return NET_XMIT_SUCCESS;
  89        }
  90
  91        return qdisc_drop(skb, sch);
  92}
  93
  94static struct sk_buff *
  95teql_dequeue(struct Qdisc *sch)
  96{
  97        struct teql_sched_data *dat = qdisc_priv(sch);
  98        struct netdev_queue *dat_queue;
  99        struct sk_buff *skb;
 100
 101        skb = __skb_dequeue(&dat->q);
 102        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 103        if (skb == NULL) {
 104                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 105                if (m) {
 106                        dat->m->slaves = sch;
 107                        netif_wake_queue(m);
 108                }
 109        } else {
 110                qdisc_bstats_update(sch, skb);
 111        }
 112        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 113        return skb;
 114}
 115
 116static struct sk_buff *
 117teql_peek(struct Qdisc *sch)
 118{
 119        /* teql is meant to be used as root qdisc */
 120        return NULL;
 121}
 122
 123static inline void
 124teql_neigh_release(struct neighbour *n)
 125{
 126        if (n)
 127                neigh_release(n);
 128}
 129
 130static void
 131teql_reset(struct Qdisc *sch)
 132{
 133        struct teql_sched_data *dat = qdisc_priv(sch);
 134
 135        skb_queue_purge(&dat->q);
 136        sch->q.qlen = 0;
 137        teql_neigh_release(xchg(&dat->ncache, NULL));
 138}
 139
 140static void
 141teql_destroy(struct Qdisc *sch)
 142{
 143        struct Qdisc *q, *prev;
 144        struct teql_sched_data *dat = qdisc_priv(sch);
 145        struct teql_master *master = dat->m;
 146
 147        prev = master->slaves;
 148        if (prev) {
 149                do {
 150                        q = NEXT_SLAVE(prev);
 151                        if (q == sch) {
 152                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 153                                if (q == master->slaves) {
 154                                        master->slaves = NEXT_SLAVE(q);
 155                                        if (q == master->slaves) {
 156                                                struct netdev_queue *txq;
 157                                                spinlock_t *root_lock;
 158
 159                                                txq = netdev_get_tx_queue(master->dev, 0);
 160                                                master->slaves = NULL;
 161
 162                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 163                                                spin_lock_bh(root_lock);
 164                                                qdisc_reset(txq->qdisc);
 165                                                spin_unlock_bh(root_lock);
 166                                        }
 167                                }
 168                                skb_queue_purge(&dat->q);
 169                                teql_neigh_release(xchg(&dat->ncache, NULL));
 170                                break;
 171                        }
 172
 173                } while ((prev = q) != master->slaves);
 174        }
 175}
 176
 177static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 178{
 179        struct net_device *dev = qdisc_dev(sch);
 180        struct teql_master *m = (struct teql_master *)sch->ops;
 181        struct teql_sched_data *q = qdisc_priv(sch);
 182
 183        if (dev->hard_header_len > m->dev->hard_header_len)
 184                return -EINVAL;
 185
 186        if (m->dev == dev)
 187                return -ELOOP;
 188
 189        q->m = m;
 190
 191        skb_queue_head_init(&q->q);
 192
 193        if (m->slaves) {
 194                if (m->dev->flags & IFF_UP) {
 195                        if ((m->dev->flags & IFF_POINTOPOINT &&
 196                             !(dev->flags & IFF_POINTOPOINT)) ||
 197                            (m->dev->flags & IFF_BROADCAST &&
 198                             !(dev->flags & IFF_BROADCAST)) ||
 199                            (m->dev->flags & IFF_MULTICAST &&
 200                             !(dev->flags & IFF_MULTICAST)) ||
 201                            dev->mtu < m->dev->mtu)
 202                                return -EINVAL;
 203                } else {
 204                        if (!(dev->flags&IFF_POINTOPOINT))
 205                                m->dev->flags &= ~IFF_POINTOPOINT;
 206                        if (!(dev->flags&IFF_BROADCAST))
 207                                m->dev->flags &= ~IFF_BROADCAST;
 208                        if (!(dev->flags&IFF_MULTICAST))
 209                                m->dev->flags &= ~IFF_MULTICAST;
 210                        if (dev->mtu < m->dev->mtu)
 211                                m->dev->mtu = dev->mtu;
 212                }
 213                q->next = NEXT_SLAVE(m->slaves);
 214                NEXT_SLAVE(m->slaves) = sch;
 215        } else {
 216                q->next = sch;
 217                m->slaves = sch;
 218                m->dev->mtu = dev->mtu;
 219                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 220        }
 221        return 0;
 222}
 223
 224
 225static int
 226__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 227               struct net_device *dev, struct netdev_queue *txq,
 228               struct neighbour *mn)
 229{
 230        struct teql_sched_data *q = qdisc_priv(txq->qdisc);
 231        struct neighbour *n = q->ncache;
 232
 233        if (mn->tbl == NULL)
 234                return -EINVAL;
 235        if (n && n->tbl == mn->tbl &&
 236            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 237                atomic_inc(&n->refcnt);
 238        } else {
 239                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 240                if (IS_ERR(n))
 241                        return PTR_ERR(n);
 242        }
 243        if (neigh_event_send(n, skb_res) == 0) {
 244                int err;
 245                char haddr[MAX_ADDR_LEN];
 246
 247                neigh_ha_snapshot(haddr, n, dev);
 248                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 249                                      NULL, skb->len);
 250
 251                if (err < 0) {
 252                        neigh_release(n);
 253                        return -EINVAL;
 254                }
 255                teql_neigh_release(xchg(&q->ncache, n));
 256                return 0;
 257        }
 258        neigh_release(n);
 259        return (skb_res == NULL) ? -EAGAIN : 1;
 260}
 261
 262static inline int teql_resolve(struct sk_buff *skb,
 263                               struct sk_buff *skb_res,
 264                               struct net_device *dev,
 265                               struct netdev_queue *txq)
 266{
 267        struct dst_entry *dst = skb_dst(skb);
 268        struct neighbour *mn;
 269        int res;
 270
 271        if (txq->qdisc == &noop_qdisc)
 272                return -ENODEV;
 273
 274        if (!dev->header_ops || !dst)
 275                return 0;
 276
 277        rcu_read_lock();
 278        mn = dst_get_neighbour_noref(dst);
 279        res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
 280        rcu_read_unlock();
 281
 282        return res;
 283}
 284
 285static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 286{
 287        struct teql_master *master = netdev_priv(dev);
 288        struct Qdisc *start, *q;
 289        int busy;
 290        int nores;
 291        int subq = skb_get_queue_mapping(skb);
 292        struct sk_buff *skb_res = NULL;
 293
 294        start = master->slaves;
 295
 296restart:
 297        nores = 0;
 298        busy = 0;
 299
 300        q = start;
 301        if (!q)
 302                goto drop;
 303
 304        do {
 305                struct net_device *slave = qdisc_dev(q);
 306                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 307                const struct net_device_ops *slave_ops = slave->netdev_ops;
 308
 309                if (slave_txq->qdisc_sleeping != q)
 310                        continue;
 311                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 312                    !netif_running(slave)) {
 313                        busy = 1;
 314                        continue;
 315                }
 316
 317                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 318                case 0:
 319                        if (__netif_tx_trylock(slave_txq)) {
 320                                unsigned int length = qdisc_pkt_len(skb);
 321
 322                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 323                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 324                                        txq_trans_update(slave_txq);
 325                                        __netif_tx_unlock(slave_txq);
 326                                        master->slaves = NEXT_SLAVE(q);
 327                                        netif_wake_queue(dev);
 328                                        master->tx_packets++;
 329                                        master->tx_bytes += length;
 330                                        return NETDEV_TX_OK;
 331                                }
 332                                __netif_tx_unlock(slave_txq);
 333                        }
 334                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 335                                busy = 1;
 336                        break;
 337                case 1:
 338                        master->slaves = NEXT_SLAVE(q);
 339                        return NETDEV_TX_OK;
 340                default:
 341                        nores = 1;
 342                        break;
 343                }
 344                __skb_pull(skb, skb_network_offset(skb));
 345        } while ((q = NEXT_SLAVE(q)) != start);
 346
 347        if (nores && skb_res == NULL) {
 348                skb_res = skb;
 349                goto restart;
 350        }
 351
 352        if (busy) {
 353                netif_stop_queue(dev);
 354                return NETDEV_TX_BUSY;
 355        }
 356        master->tx_errors++;
 357
 358drop:
 359        master->tx_dropped++;
 360        dev_kfree_skb(skb);
 361        return NETDEV_TX_OK;
 362}
 363
 364static int teql_master_open(struct net_device *dev)
 365{
 366        struct Qdisc *q;
 367        struct teql_master *m = netdev_priv(dev);
 368        int mtu = 0xFFFE;
 369        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 370
 371        if (m->slaves == NULL)
 372                return -EUNATCH;
 373
 374        flags = FMASK;
 375
 376        q = m->slaves;
 377        do {
 378                struct net_device *slave = qdisc_dev(q);
 379
 380                if (slave == NULL)
 381                        return -EUNATCH;
 382
 383                if (slave->mtu < mtu)
 384                        mtu = slave->mtu;
 385                if (slave->hard_header_len > LL_MAX_HEADER)
 386                        return -EINVAL;
 387
 388                /* If all the slaves are BROADCAST, master is BROADCAST
 389                   If all the slaves are PtP, master is PtP
 390                   Otherwise, master is NBMA.
 391                 */
 392                if (!(slave->flags&IFF_POINTOPOINT))
 393                        flags &= ~IFF_POINTOPOINT;
 394                if (!(slave->flags&IFF_BROADCAST))
 395                        flags &= ~IFF_BROADCAST;
 396                if (!(slave->flags&IFF_MULTICAST))
 397                        flags &= ~IFF_MULTICAST;
 398        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 399
 400        m->dev->mtu = mtu;
 401        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 402        netif_start_queue(m->dev);
 403        return 0;
 404}
 405
 406static int teql_master_close(struct net_device *dev)
 407{
 408        netif_stop_queue(dev);
 409        return 0;
 410}
 411
 412static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 413                                                     struct rtnl_link_stats64 *stats)
 414{
 415        struct teql_master *m = netdev_priv(dev);
 416
 417        stats->tx_packets       = m->tx_packets;
 418        stats->tx_bytes         = m->tx_bytes;
 419        stats->tx_errors        = m->tx_errors;
 420        stats->tx_dropped       = m->tx_dropped;
 421        return stats;
 422}
 423
 424static int teql_master_mtu(struct net_device *dev, int new_mtu)
 425{
 426        struct teql_master *m = netdev_priv(dev);
 427        struct Qdisc *q;
 428
 429        if (new_mtu < 68)
 430                return -EINVAL;
 431
 432        q = m->slaves;
 433        if (q) {
 434                do {
 435                        if (new_mtu > qdisc_dev(q)->mtu)
 436                                return -EINVAL;
 437                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 438        }
 439
 440        dev->mtu = new_mtu;
 441        return 0;
 442}
 443
 444static const struct net_device_ops teql_netdev_ops = {
 445        .ndo_open       = teql_master_open,
 446        .ndo_stop       = teql_master_close,
 447        .ndo_start_xmit = teql_master_xmit,
 448        .ndo_get_stats64 = teql_master_stats64,
 449        .ndo_change_mtu = teql_master_mtu,
 450};
 451
 452static __init void teql_master_setup(struct net_device *dev)
 453{
 454        struct teql_master *master = netdev_priv(dev);
 455        struct Qdisc_ops *ops = &master->qops;
 456
 457        master->dev     = dev;
 458        ops->priv_size  = sizeof(struct teql_sched_data);
 459
 460        ops->enqueue    =       teql_enqueue;
 461        ops->dequeue    =       teql_dequeue;
 462        ops->peek       =       teql_peek;
 463        ops->init       =       teql_qdisc_init;
 464        ops->reset      =       teql_reset;
 465        ops->destroy    =       teql_destroy;
 466        ops->owner      =       THIS_MODULE;
 467
 468        dev->netdev_ops =       &teql_netdev_ops;
 469        dev->type               = ARPHRD_VOID;
 470        dev->mtu                = 1500;
 471        dev->tx_queue_len       = 100;
 472        dev->flags              = IFF_NOARP;
 473        dev->hard_header_len    = LL_MAX_HEADER;
 474        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 475}
 476
 477static LIST_HEAD(master_dev_list);
 478static int max_equalizers = 1;
 479module_param(max_equalizers, int, 0);
 480MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 481
 482static int __init teql_init(void)
 483{
 484        int i;
 485        int err = -ENODEV;
 486
 487        for (i = 0; i < max_equalizers; i++) {
 488                struct net_device *dev;
 489                struct teql_master *master;
 490
 491                dev = alloc_netdev(sizeof(struct teql_master),
 492                                  "teql%d", teql_master_setup);
 493                if (!dev) {
 494                        err = -ENOMEM;
 495                        break;
 496                }
 497
 498                if ((err = register_netdev(dev))) {
 499                        free_netdev(dev);
 500                        break;
 501                }
 502
 503                master = netdev_priv(dev);
 504
 505                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 506                err = register_qdisc(&master->qops);
 507
 508                if (err) {
 509                        unregister_netdev(dev);
 510                        free_netdev(dev);
 511                        break;
 512                }
 513
 514                list_add_tail(&master->master_list, &master_dev_list);
 515        }
 516        return i ? 0 : err;
 517}
 518
 519static void __exit teql_exit(void)
 520{
 521        struct teql_master *master, *nxt;
 522
 523        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 524
 525                list_del(&master->master_list);
 526
 527                unregister_qdisc(&master->qops);
 528                unregister_netdev(master->dev);
 529                free_netdev(master->dev);
 530        }
 531}
 532
 533module_init(teql_init);
 534module_exit(teql_exit);
 535
 536MODULE_LICENSE("GPL");
 537