linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct sk_buff_head q;
  71};
  72
  73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  74
  75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  76
  77/* "teql*" qdisc routines */
  78
  79static int
  80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  81{
  82        struct net_device *dev = qdisc_dev(sch);
  83        struct teql_sched_data *q = qdisc_priv(sch);
  84
  85        if (q->q.qlen < dev->tx_queue_len) {
  86                __skb_queue_tail(&q->q, skb);
  87                return NET_XMIT_SUCCESS;
  88        }
  89
  90        return qdisc_drop(skb, sch);
  91}
  92
  93static struct sk_buff *
  94teql_dequeue(struct Qdisc *sch)
  95{
  96        struct teql_sched_data *dat = qdisc_priv(sch);
  97        struct netdev_queue *dat_queue;
  98        struct sk_buff *skb;
  99        struct Qdisc *q;
 100
 101        skb = __skb_dequeue(&dat->q);
 102        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 103        q = rcu_dereference_bh(dat_queue->qdisc);
 104
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(q);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + q->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static inline void
 126teql_neigh_release(struct neighbour *n)
 127{
 128        if (n)
 129                neigh_release(n);
 130}
 131
 132static void
 133teql_reset(struct Qdisc *sch)
 134{
 135        struct teql_sched_data *dat = qdisc_priv(sch);
 136
 137        skb_queue_purge(&dat->q);
 138        sch->q.qlen = 0;
 139}
 140
 141static void
 142teql_destroy(struct Qdisc *sch)
 143{
 144        struct Qdisc *q, *prev;
 145        struct teql_sched_data *dat = qdisc_priv(sch);
 146        struct teql_master *master = dat->m;
 147
 148        prev = master->slaves;
 149        if (prev) {
 150                do {
 151                        q = NEXT_SLAVE(prev);
 152                        if (q == sch) {
 153                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 154                                if (q == master->slaves) {
 155                                        master->slaves = NEXT_SLAVE(q);
 156                                        if (q == master->slaves) {
 157                                                struct netdev_queue *txq;
 158                                                spinlock_t *root_lock;
 159
 160                                                txq = netdev_get_tx_queue(master->dev, 0);
 161                                                master->slaves = NULL;
 162
 163                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 164                                                spin_lock_bh(root_lock);
 165                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 166                                                spin_unlock_bh(root_lock);
 167                                        }
 168                                }
 169                                skb_queue_purge(&dat->q);
 170                                break;
 171                        }
 172
 173                } while ((prev = q) != master->slaves);
 174        }
 175}
 176
 177static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 178{
 179        struct net_device *dev = qdisc_dev(sch);
 180        struct teql_master *m = (struct teql_master *)sch->ops;
 181        struct teql_sched_data *q = qdisc_priv(sch);
 182
 183        if (dev->hard_header_len > m->dev->hard_header_len)
 184                return -EINVAL;
 185
 186        if (m->dev == dev)
 187                return -ELOOP;
 188
 189        q->m = m;
 190
 191        skb_queue_head_init(&q->q);
 192
 193        if (m->slaves) {
 194                if (m->dev->flags & IFF_UP) {
 195                        if ((m->dev->flags & IFF_POINTOPOINT &&
 196                             !(dev->flags & IFF_POINTOPOINT)) ||
 197                            (m->dev->flags & IFF_BROADCAST &&
 198                             !(dev->flags & IFF_BROADCAST)) ||
 199                            (m->dev->flags & IFF_MULTICAST &&
 200                             !(dev->flags & IFF_MULTICAST)) ||
 201                            dev->mtu < m->dev->mtu)
 202                                return -EINVAL;
 203                } else {
 204                        if (!(dev->flags&IFF_POINTOPOINT))
 205                                m->dev->flags &= ~IFF_POINTOPOINT;
 206                        if (!(dev->flags&IFF_BROADCAST))
 207                                m->dev->flags &= ~IFF_BROADCAST;
 208                        if (!(dev->flags&IFF_MULTICAST))
 209                                m->dev->flags &= ~IFF_MULTICAST;
 210                        if (dev->mtu < m->dev->mtu)
 211                                m->dev->mtu = dev->mtu;
 212                }
 213                q->next = NEXT_SLAVE(m->slaves);
 214                NEXT_SLAVE(m->slaves) = sch;
 215        } else {
 216                q->next = sch;
 217                m->slaves = sch;
 218                m->dev->mtu = dev->mtu;
 219                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 220        }
 221        return 0;
 222}
 223
 224
 225static int
 226__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 227               struct net_device *dev, struct netdev_queue *txq,
 228               struct dst_entry *dst)
 229{
 230        struct neighbour *n;
 231        int err = 0;
 232
 233        n = dst_neigh_lookup_skb(dst, skb);
 234        if (!n)
 235                return -ENOENT;
 236
 237        if (dst->dev != dev) {
 238                struct neighbour *mn;
 239
 240                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 241                neigh_release(n);
 242                if (IS_ERR(mn))
 243                        return PTR_ERR(mn);
 244                n = mn;
 245        }
 246
 247        if (neigh_event_send(n, skb_res) == 0) {
 248                int err;
 249                char haddr[MAX_ADDR_LEN];
 250
 251                neigh_ha_snapshot(haddr, n, dev);
 252                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 253                                      NULL, skb->len);
 254
 255                if (err < 0)
 256                        err = -EINVAL;
 257        } else {
 258                err = (skb_res == NULL) ? -EAGAIN : 1;
 259        }
 260        neigh_release(n);
 261        return err;
 262}
 263
 264static inline int teql_resolve(struct sk_buff *skb,
 265                               struct sk_buff *skb_res,
 266                               struct net_device *dev,
 267                               struct netdev_queue *txq)
 268{
 269        struct dst_entry *dst = skb_dst(skb);
 270        int res;
 271
 272        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 273                return -ENODEV;
 274
 275        if (!dev->header_ops || !dst)
 276                return 0;
 277
 278        rcu_read_lock();
 279        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 280        rcu_read_unlock();
 281
 282        return res;
 283}
 284
 285static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 286{
 287        struct teql_master *master = netdev_priv(dev);
 288        struct Qdisc *start, *q;
 289        int busy;
 290        int nores;
 291        int subq = skb_get_queue_mapping(skb);
 292        struct sk_buff *skb_res = NULL;
 293
 294        start = master->slaves;
 295
 296restart:
 297        nores = 0;
 298        busy = 0;
 299
 300        q = start;
 301        if (!q)
 302                goto drop;
 303
 304        do {
 305                struct net_device *slave = qdisc_dev(q);
 306                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 307
 308                if (slave_txq->qdisc_sleeping != q)
 309                        continue;
 310                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 311                    !netif_running(slave)) {
 312                        busy = 1;
 313                        continue;
 314                }
 315
 316                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 317                case 0:
 318                        if (__netif_tx_trylock(slave_txq)) {
 319                                unsigned int length = qdisc_pkt_len(skb);
 320
 321                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 322                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 323                                    NETDEV_TX_OK) {
 324                                        __netif_tx_unlock(slave_txq);
 325                                        master->slaves = NEXT_SLAVE(q);
 326                                        netif_wake_queue(dev);
 327                                        master->tx_packets++;
 328                                        master->tx_bytes += length;
 329                                        return NETDEV_TX_OK;
 330                                }
 331                                __netif_tx_unlock(slave_txq);
 332                        }
 333                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 334                                busy = 1;
 335                        break;
 336                case 1:
 337                        master->slaves = NEXT_SLAVE(q);
 338                        return NETDEV_TX_OK;
 339                default:
 340                        nores = 1;
 341                        break;
 342                }
 343                __skb_pull(skb, skb_network_offset(skb));
 344        } while ((q = NEXT_SLAVE(q)) != start);
 345
 346        if (nores && skb_res == NULL) {
 347                skb_res = skb;
 348                goto restart;
 349        }
 350
 351        if (busy) {
 352                netif_stop_queue(dev);
 353                return NETDEV_TX_BUSY;
 354        }
 355        master->tx_errors++;
 356
 357drop:
 358        master->tx_dropped++;
 359        dev_kfree_skb(skb);
 360        return NETDEV_TX_OK;
 361}
 362
 363static int teql_master_open(struct net_device *dev)
 364{
 365        struct Qdisc *q;
 366        struct teql_master *m = netdev_priv(dev);
 367        int mtu = 0xFFFE;
 368        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 369
 370        if (m->slaves == NULL)
 371                return -EUNATCH;
 372
 373        flags = FMASK;
 374
 375        q = m->slaves;
 376        do {
 377                struct net_device *slave = qdisc_dev(q);
 378
 379                if (slave == NULL)
 380                        return -EUNATCH;
 381
 382                if (slave->mtu < mtu)
 383                        mtu = slave->mtu;
 384                if (slave->hard_header_len > LL_MAX_HEADER)
 385                        return -EINVAL;
 386
 387                /* If all the slaves are BROADCAST, master is BROADCAST
 388                   If all the slaves are PtP, master is PtP
 389                   Otherwise, master is NBMA.
 390                 */
 391                if (!(slave->flags&IFF_POINTOPOINT))
 392                        flags &= ~IFF_POINTOPOINT;
 393                if (!(slave->flags&IFF_BROADCAST))
 394                        flags &= ~IFF_BROADCAST;
 395                if (!(slave->flags&IFF_MULTICAST))
 396                        flags &= ~IFF_MULTICAST;
 397        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 398
 399        m->dev->mtu = mtu;
 400        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 401        netif_start_queue(m->dev);
 402        return 0;
 403}
 404
 405static int teql_master_close(struct net_device *dev)
 406{
 407        netif_stop_queue(dev);
 408        return 0;
 409}
 410
 411static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 412                                                     struct rtnl_link_stats64 *stats)
 413{
 414        struct teql_master *m = netdev_priv(dev);
 415
 416        stats->tx_packets       = m->tx_packets;
 417        stats->tx_bytes         = m->tx_bytes;
 418        stats->tx_errors        = m->tx_errors;
 419        stats->tx_dropped       = m->tx_dropped;
 420        return stats;
 421}
 422
 423static int teql_master_mtu(struct net_device *dev, int new_mtu)
 424{
 425        struct teql_master *m = netdev_priv(dev);
 426        struct Qdisc *q;
 427
 428        if (new_mtu < 68)
 429                return -EINVAL;
 430
 431        q = m->slaves;
 432        if (q) {
 433                do {
 434                        if (new_mtu > qdisc_dev(q)->mtu)
 435                                return -EINVAL;
 436                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 437        }
 438
 439        dev->mtu = new_mtu;
 440        return 0;
 441}
 442
 443static const struct net_device_ops teql_netdev_ops = {
 444        .ndo_open       = teql_master_open,
 445        .ndo_stop       = teql_master_close,
 446        .ndo_start_xmit = teql_master_xmit,
 447        .ndo_get_stats64 = teql_master_stats64,
 448        .ndo_change_mtu = teql_master_mtu,
 449};
 450
 451static __init void teql_master_setup(struct net_device *dev)
 452{
 453        struct teql_master *master = netdev_priv(dev);
 454        struct Qdisc_ops *ops = &master->qops;
 455
 456        master->dev     = dev;
 457        ops->priv_size  = sizeof(struct teql_sched_data);
 458
 459        ops->enqueue    =       teql_enqueue;
 460        ops->dequeue    =       teql_dequeue;
 461        ops->peek       =       teql_peek;
 462        ops->init       =       teql_qdisc_init;
 463        ops->reset      =       teql_reset;
 464        ops->destroy    =       teql_destroy;
 465        ops->owner      =       THIS_MODULE;
 466
 467        dev->netdev_ops =       &teql_netdev_ops;
 468        dev->type               = ARPHRD_VOID;
 469        dev->mtu                = 1500;
 470        dev->tx_queue_len       = 100;
 471        dev->flags              = IFF_NOARP;
 472        dev->hard_header_len    = LL_MAX_HEADER;
 473        netif_keep_dst(dev);
 474}
 475
 476static LIST_HEAD(master_dev_list);
 477static int max_equalizers = 1;
 478module_param(max_equalizers, int, 0);
 479MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 480
 481static int __init teql_init(void)
 482{
 483        int i;
 484        int err = -ENODEV;
 485
 486        for (i = 0; i < max_equalizers; i++) {
 487                struct net_device *dev;
 488                struct teql_master *master;
 489
 490                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 491                                   NET_NAME_UNKNOWN, teql_master_setup);
 492                if (!dev) {
 493                        err = -ENOMEM;
 494                        break;
 495                }
 496
 497                if ((err = register_netdev(dev))) {
 498                        free_netdev(dev);
 499                        break;
 500                }
 501
 502                master = netdev_priv(dev);
 503
 504                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 505                err = register_qdisc(&master->qops);
 506
 507                if (err) {
 508                        unregister_netdev(dev);
 509                        free_netdev(dev);
 510                        break;
 511                }
 512
 513                list_add_tail(&master->master_list, &master_dev_list);
 514        }
 515        return i ? 0 : err;
 516}
 517
 518static void __exit teql_exit(void)
 519{
 520        struct teql_master *master, *nxt;
 521
 522        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 523
 524                list_del(&master->master_list);
 525
 526                unregister_qdisc(&master->qops);
 527                unregister_netdev(master->dev);
 528                free_netdev(master->dev);
 529        }
 530}
 531
 532module_init(teql_init);
 533module_exit(teql_exit);
 534
 535MODULE_LICENSE("GPL");
 536