linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct sk_buff_head q;
  71};
  72
  73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  74
  75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  76
  77/* "teql*" qdisc routines */
  78
  79static int
  80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  81{
  82        struct net_device *dev = qdisc_dev(sch);
  83        struct teql_sched_data *q = qdisc_priv(sch);
  84
  85        if (q->q.qlen < dev->tx_queue_len) {
  86                __skb_queue_tail(&q->q, skb);
  87                return NET_XMIT_SUCCESS;
  88        }
  89
  90        return qdisc_drop(skb, sch);
  91}
  92
  93static struct sk_buff *
  94teql_dequeue(struct Qdisc *sch)
  95{
  96        struct teql_sched_data *dat = qdisc_priv(sch);
  97        struct netdev_queue *dat_queue;
  98        struct sk_buff *skb;
  99
 100        skb = __skb_dequeue(&dat->q);
 101        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 102        if (skb == NULL) {
 103                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 104                if (m) {
 105                        dat->m->slaves = sch;
 106                        netif_wake_queue(m);
 107                }
 108        } else {
 109                qdisc_bstats_update(sch, skb);
 110        }
 111        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 112        return skb;
 113}
 114
 115static struct sk_buff *
 116teql_peek(struct Qdisc *sch)
 117{
 118        /* teql is meant to be used as root qdisc */
 119        return NULL;
 120}
 121
 122static inline void
 123teql_neigh_release(struct neighbour *n)
 124{
 125        if (n)
 126                neigh_release(n);
 127}
 128
 129static void
 130teql_reset(struct Qdisc *sch)
 131{
 132        struct teql_sched_data *dat = qdisc_priv(sch);
 133
 134        skb_queue_purge(&dat->q);
 135        sch->q.qlen = 0;
 136}
 137
 138static void
 139teql_destroy(struct Qdisc *sch)
 140{
 141        struct Qdisc *q, *prev;
 142        struct teql_sched_data *dat = qdisc_priv(sch);
 143        struct teql_master *master = dat->m;
 144
 145        prev = master->slaves;
 146        if (prev) {
 147                do {
 148                        q = NEXT_SLAVE(prev);
 149                        if (q == sch) {
 150                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 151                                if (q == master->slaves) {
 152                                        master->slaves = NEXT_SLAVE(q);
 153                                        if (q == master->slaves) {
 154                                                struct netdev_queue *txq;
 155                                                spinlock_t *root_lock;
 156
 157                                                txq = netdev_get_tx_queue(master->dev, 0);
 158                                                master->slaves = NULL;
 159
 160                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 161                                                spin_lock_bh(root_lock);
 162                                                qdisc_reset(txq->qdisc);
 163                                                spin_unlock_bh(root_lock);
 164                                        }
 165                                }
 166                                skb_queue_purge(&dat->q);
 167                                break;
 168                        }
 169
 170                } while ((prev = q) != master->slaves);
 171        }
 172}
 173
 174static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 175{
 176        struct net_device *dev = qdisc_dev(sch);
 177        struct teql_master *m = (struct teql_master *)sch->ops;
 178        struct teql_sched_data *q = qdisc_priv(sch);
 179
 180        if (dev->hard_header_len > m->dev->hard_header_len)
 181                return -EINVAL;
 182
 183        if (m->dev == dev)
 184                return -ELOOP;
 185
 186        q->m = m;
 187
 188        skb_queue_head_init(&q->q);
 189
 190        if (m->slaves) {
 191                if (m->dev->flags & IFF_UP) {
 192                        if ((m->dev->flags & IFF_POINTOPOINT &&
 193                             !(dev->flags & IFF_POINTOPOINT)) ||
 194                            (m->dev->flags & IFF_BROADCAST &&
 195                             !(dev->flags & IFF_BROADCAST)) ||
 196                            (m->dev->flags & IFF_MULTICAST &&
 197                             !(dev->flags & IFF_MULTICAST)) ||
 198                            dev->mtu < m->dev->mtu)
 199                                return -EINVAL;
 200                } else {
 201                        if (!(dev->flags&IFF_POINTOPOINT))
 202                                m->dev->flags &= ~IFF_POINTOPOINT;
 203                        if (!(dev->flags&IFF_BROADCAST))
 204                                m->dev->flags &= ~IFF_BROADCAST;
 205                        if (!(dev->flags&IFF_MULTICAST))
 206                                m->dev->flags &= ~IFF_MULTICAST;
 207                        if (dev->mtu < m->dev->mtu)
 208                                m->dev->mtu = dev->mtu;
 209                }
 210                q->next = NEXT_SLAVE(m->slaves);
 211                NEXT_SLAVE(m->slaves) = sch;
 212        } else {
 213                q->next = sch;
 214                m->slaves = sch;
 215                m->dev->mtu = dev->mtu;
 216                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 217        }
 218        return 0;
 219}
 220
 221
 222static int
 223__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 224               struct net_device *dev, struct netdev_queue *txq,
 225               struct dst_entry *dst)
 226{
 227        struct neighbour *n;
 228        int err = 0;
 229
 230        n = dst_neigh_lookup_skb(dst, skb);
 231        if (!n)
 232                return -ENOENT;
 233
 234        if (dst->dev != dev) {
 235                struct neighbour *mn;
 236
 237                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 238                neigh_release(n);
 239                if (IS_ERR(mn))
 240                        return PTR_ERR(mn);
 241                n = mn;
 242        }
 243
 244        if (neigh_event_send(n, skb_res) == 0) {
 245                int err;
 246                char haddr[MAX_ADDR_LEN];
 247
 248                neigh_ha_snapshot(haddr, n, dev);
 249                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 250                                      NULL, skb->len);
 251
 252                if (err < 0)
 253                        err = -EINVAL;
 254        } else {
 255                err = (skb_res == NULL) ? -EAGAIN : 1;
 256        }
 257        neigh_release(n);
 258        return err;
 259}
 260
 261static inline int teql_resolve(struct sk_buff *skb,
 262                               struct sk_buff *skb_res,
 263                               struct net_device *dev,
 264                               struct netdev_queue *txq)
 265{
 266        struct dst_entry *dst = skb_dst(skb);
 267        int res;
 268
 269        if (txq->qdisc == &noop_qdisc)
 270                return -ENODEV;
 271
 272        if (!dev->header_ops || !dst)
 273                return 0;
 274
 275        rcu_read_lock();
 276        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 277        rcu_read_unlock();
 278
 279        return res;
 280}
 281
 282static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 283{
 284        struct teql_master *master = netdev_priv(dev);
 285        struct Qdisc *start, *q;
 286        int busy;
 287        int nores;
 288        int subq = skb_get_queue_mapping(skb);
 289        struct sk_buff *skb_res = NULL;
 290
 291        start = master->slaves;
 292
 293restart:
 294        nores = 0;
 295        busy = 0;
 296
 297        q = start;
 298        if (!q)
 299                goto drop;
 300
 301        do {
 302                struct net_device *slave = qdisc_dev(q);
 303                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 304                const struct net_device_ops *slave_ops = slave->netdev_ops;
 305
 306                if (slave_txq->qdisc_sleeping != q)
 307                        continue;
 308                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 309                    !netif_running(slave)) {
 310                        busy = 1;
 311                        continue;
 312                }
 313
 314                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 315                case 0:
 316                        if (__netif_tx_trylock(slave_txq)) {
 317                                unsigned int length = qdisc_pkt_len(skb);
 318
 319                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 320                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 321                                        txq_trans_update(slave_txq);
 322                                        __netif_tx_unlock(slave_txq);
 323                                        master->slaves = NEXT_SLAVE(q);
 324                                        netif_wake_queue(dev);
 325                                        master->tx_packets++;
 326                                        master->tx_bytes += length;
 327                                        return NETDEV_TX_OK;
 328                                }
 329                                __netif_tx_unlock(slave_txq);
 330                        }
 331                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 332                                busy = 1;
 333                        break;
 334                case 1:
 335                        master->slaves = NEXT_SLAVE(q);
 336                        return NETDEV_TX_OK;
 337                default:
 338                        nores = 1;
 339                        break;
 340                }
 341                __skb_pull(skb, skb_network_offset(skb));
 342        } while ((q = NEXT_SLAVE(q)) != start);
 343
 344        if (nores && skb_res == NULL) {
 345                skb_res = skb;
 346                goto restart;
 347        }
 348
 349        if (busy) {
 350                netif_stop_queue(dev);
 351                return NETDEV_TX_BUSY;
 352        }
 353        master->tx_errors++;
 354
 355drop:
 356        master->tx_dropped++;
 357        dev_kfree_skb(skb);
 358        return NETDEV_TX_OK;
 359}
 360
 361static int teql_master_open(struct net_device *dev)
 362{
 363        struct Qdisc *q;
 364        struct teql_master *m = netdev_priv(dev);
 365        int mtu = 0xFFFE;
 366        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 367
 368        if (m->slaves == NULL)
 369                return -EUNATCH;
 370
 371        flags = FMASK;
 372
 373        q = m->slaves;
 374        do {
 375                struct net_device *slave = qdisc_dev(q);
 376
 377                if (slave == NULL)
 378                        return -EUNATCH;
 379
 380                if (slave->mtu < mtu)
 381                        mtu = slave->mtu;
 382                if (slave->hard_header_len > LL_MAX_HEADER)
 383                        return -EINVAL;
 384
 385                /* If all the slaves are BROADCAST, master is BROADCAST
 386                   If all the slaves are PtP, master is PtP
 387                   Otherwise, master is NBMA.
 388                 */
 389                if (!(slave->flags&IFF_POINTOPOINT))
 390                        flags &= ~IFF_POINTOPOINT;
 391                if (!(slave->flags&IFF_BROADCAST))
 392                        flags &= ~IFF_BROADCAST;
 393                if (!(slave->flags&IFF_MULTICAST))
 394                        flags &= ~IFF_MULTICAST;
 395        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 396
 397        m->dev->mtu = mtu;
 398        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 399        netif_start_queue(m->dev);
 400        return 0;
 401}
 402
 403static int teql_master_close(struct net_device *dev)
 404{
 405        netif_stop_queue(dev);
 406        return 0;
 407}
 408
 409static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 410                                                     struct rtnl_link_stats64 *stats)
 411{
 412        struct teql_master *m = netdev_priv(dev);
 413
 414        stats->tx_packets       = m->tx_packets;
 415        stats->tx_bytes         = m->tx_bytes;
 416        stats->tx_errors        = m->tx_errors;
 417        stats->tx_dropped       = m->tx_dropped;
 418        return stats;
 419}
 420
 421static int teql_master_mtu(struct net_device *dev, int new_mtu)
 422{
 423        struct teql_master *m = netdev_priv(dev);
 424        struct Qdisc *q;
 425
 426        if (new_mtu < 68)
 427                return -EINVAL;
 428
 429        q = m->slaves;
 430        if (q) {
 431                do {
 432                        if (new_mtu > qdisc_dev(q)->mtu)
 433                                return -EINVAL;
 434                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 435        }
 436
 437        dev->mtu = new_mtu;
 438        return 0;
 439}
 440
 441static const struct net_device_ops teql_netdev_ops = {
 442        .ndo_open       = teql_master_open,
 443        .ndo_stop       = teql_master_close,
 444        .ndo_start_xmit = teql_master_xmit,
 445        .ndo_get_stats64 = teql_master_stats64,
 446        .ndo_change_mtu = teql_master_mtu,
 447};
 448
 449static __init void teql_master_setup(struct net_device *dev)
 450{
 451        struct teql_master *master = netdev_priv(dev);
 452        struct Qdisc_ops *ops = &master->qops;
 453
 454        master->dev     = dev;
 455        ops->priv_size  = sizeof(struct teql_sched_data);
 456
 457        ops->enqueue    =       teql_enqueue;
 458        ops->dequeue    =       teql_dequeue;
 459        ops->peek       =       teql_peek;
 460        ops->init       =       teql_qdisc_init;
 461        ops->reset      =       teql_reset;
 462        ops->destroy    =       teql_destroy;
 463        ops->owner      =       THIS_MODULE;
 464
 465        dev->netdev_ops =       &teql_netdev_ops;
 466        dev->type               = ARPHRD_VOID;
 467        dev->mtu                = 1500;
 468        dev->tx_queue_len       = 100;
 469        dev->flags              = IFF_NOARP;
 470        dev->hard_header_len    = LL_MAX_HEADER;
 471        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 472}
 473
 474static LIST_HEAD(master_dev_list);
 475static int max_equalizers = 1;
 476module_param(max_equalizers, int, 0);
 477MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 478
 479static int __init teql_init(void)
 480{
 481        int i;
 482        int err = -ENODEV;
 483
 484        for (i = 0; i < max_equalizers; i++) {
 485                struct net_device *dev;
 486                struct teql_master *master;
 487
 488                dev = alloc_netdev(sizeof(struct teql_master),
 489                                  "teql%d", teql_master_setup);
 490                if (!dev) {
 491                        err = -ENOMEM;
 492                        break;
 493                }
 494
 495                if ((err = register_netdev(dev))) {
 496                        free_netdev(dev);
 497                        break;
 498                }
 499
 500                master = netdev_priv(dev);
 501
 502                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 503                err = register_qdisc(&master->qops);
 504
 505                if (err) {
 506                        unregister_netdev(dev);
 507                        free_netdev(dev);
 508                        break;
 509                }
 510
 511                list_add_tail(&master->master_list, &master_dev_list);
 512        }
 513        return i ? 0 : err;
 514}
 515
 516static void __exit teql_exit(void)
 517{
 518        struct teql_master *master, *nxt;
 519
 520        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 521
 522                list_del(&master->master_list);
 523
 524                unregister_qdisc(&master->qops);
 525                unregister_netdev(master->dev);
 526                free_netdev(master->dev);
 527        }
 528}
 529
 530module_init(teql_init);
 531module_exit(teql_exit);
 532
 533MODULE_LICENSE("GPL");
 534