linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct sk_buff_head q;
  71};
  72
  73#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  74
  75#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  76
  77/* "teql*" qdisc routines */
  78
  79static int
  80teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  81{
  82        struct net_device *dev = qdisc_dev(sch);
  83        struct teql_sched_data *q = qdisc_priv(sch);
  84
  85        if (q->q.qlen < dev->tx_queue_len) {
  86                __skb_queue_tail(&q->q, skb);
  87                return NET_XMIT_SUCCESS;
  88        }
  89
  90        return qdisc_drop(skb, sch, to_free);
  91}
  92
  93static struct sk_buff *
  94teql_dequeue(struct Qdisc *sch)
  95{
  96        struct teql_sched_data *dat = qdisc_priv(sch);
  97        struct netdev_queue *dat_queue;
  98        struct sk_buff *skb;
  99        struct Qdisc *q;
 100
 101        skb = __skb_dequeue(&dat->q);
 102        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 103        q = rcu_dereference_bh(dat_queue->qdisc);
 104
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(q);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + q->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static void
 126teql_reset(struct Qdisc *sch)
 127{
 128        struct teql_sched_data *dat = qdisc_priv(sch);
 129
 130        skb_queue_purge(&dat->q);
 131        sch->q.qlen = 0;
 132}
 133
 134static void
 135teql_destroy(struct Qdisc *sch)
 136{
 137        struct Qdisc *q, *prev;
 138        struct teql_sched_data *dat = qdisc_priv(sch);
 139        struct teql_master *master = dat->m;
 140
 141        if (!master)
 142                return;
 143
 144        prev = master->slaves;
 145        if (prev) {
 146                do {
 147                        q = NEXT_SLAVE(prev);
 148                        if (q == sch) {
 149                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 150                                if (q == master->slaves) {
 151                                        master->slaves = NEXT_SLAVE(q);
 152                                        if (q == master->slaves) {
 153                                                struct netdev_queue *txq;
 154                                                spinlock_t *root_lock;
 155
 156                                                txq = netdev_get_tx_queue(master->dev, 0);
 157                                                master->slaves = NULL;
 158
 159                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 160                                                spin_lock_bh(root_lock);
 161                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 162                                                spin_unlock_bh(root_lock);
 163                                        }
 164                                }
 165                                skb_queue_purge(&dat->q);
 166                                break;
 167                        }
 168
 169                } while ((prev = q) != master->slaves);
 170        }
 171}
 172
 173static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 174                           struct netlink_ext_ack *extack)
 175{
 176        struct net_device *dev = qdisc_dev(sch);
 177        struct teql_master *m = (struct teql_master *)sch->ops;
 178        struct teql_sched_data *q = qdisc_priv(sch);
 179
 180        if (dev->hard_header_len > m->dev->hard_header_len)
 181                return -EINVAL;
 182
 183        if (m->dev == dev)
 184                return -ELOOP;
 185
 186        q->m = m;
 187
 188        skb_queue_head_init(&q->q);
 189
 190        if (m->slaves) {
 191                if (m->dev->flags & IFF_UP) {
 192                        if ((m->dev->flags & IFF_POINTOPOINT &&
 193                             !(dev->flags & IFF_POINTOPOINT)) ||
 194                            (m->dev->flags & IFF_BROADCAST &&
 195                             !(dev->flags & IFF_BROADCAST)) ||
 196                            (m->dev->flags & IFF_MULTICAST &&
 197                             !(dev->flags & IFF_MULTICAST)) ||
 198                            dev->mtu < m->dev->mtu)
 199                                return -EINVAL;
 200                } else {
 201                        if (!(dev->flags&IFF_POINTOPOINT))
 202                                m->dev->flags &= ~IFF_POINTOPOINT;
 203                        if (!(dev->flags&IFF_BROADCAST))
 204                                m->dev->flags &= ~IFF_BROADCAST;
 205                        if (!(dev->flags&IFF_MULTICAST))
 206                                m->dev->flags &= ~IFF_MULTICAST;
 207                        if (dev->mtu < m->dev->mtu)
 208                                m->dev->mtu = dev->mtu;
 209                }
 210                q->next = NEXT_SLAVE(m->slaves);
 211                NEXT_SLAVE(m->slaves) = sch;
 212        } else {
 213                q->next = sch;
 214                m->slaves = sch;
 215                m->dev->mtu = dev->mtu;
 216                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 217        }
 218        return 0;
 219}
 220
 221
 222static int
 223__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 224               struct net_device *dev, struct netdev_queue *txq,
 225               struct dst_entry *dst)
 226{
 227        struct neighbour *n;
 228        int err = 0;
 229
 230        n = dst_neigh_lookup_skb(dst, skb);
 231        if (!n)
 232                return -ENOENT;
 233
 234        if (dst->dev != dev) {
 235                struct neighbour *mn;
 236
 237                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 238                neigh_release(n);
 239                if (IS_ERR(mn))
 240                        return PTR_ERR(mn);
 241                n = mn;
 242        }
 243
 244        if (neigh_event_send(n, skb_res) == 0) {
 245                int err;
 246                char haddr[MAX_ADDR_LEN];
 247
 248                neigh_ha_snapshot(haddr, n, dev);
 249                err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
 250                                      haddr, NULL, skb->len);
 251
 252                if (err < 0)
 253                        err = -EINVAL;
 254        } else {
 255                err = (skb_res == NULL) ? -EAGAIN : 1;
 256        }
 257        neigh_release(n);
 258        return err;
 259}
 260
 261static inline int teql_resolve(struct sk_buff *skb,
 262                               struct sk_buff *skb_res,
 263                               struct net_device *dev,
 264                               struct netdev_queue *txq)
 265{
 266        struct dst_entry *dst = skb_dst(skb);
 267        int res;
 268
 269        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 270                return -ENODEV;
 271
 272        if (!dev->header_ops || !dst)
 273                return 0;
 274
 275        rcu_read_lock();
 276        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 277        rcu_read_unlock();
 278
 279        return res;
 280}
 281
 282static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 283{
 284        struct teql_master *master = netdev_priv(dev);
 285        struct Qdisc *start, *q;
 286        int busy;
 287        int nores;
 288        int subq = skb_get_queue_mapping(skb);
 289        struct sk_buff *skb_res = NULL;
 290
 291        start = master->slaves;
 292
 293restart:
 294        nores = 0;
 295        busy = 0;
 296
 297        q = start;
 298        if (!q)
 299                goto drop;
 300
 301        do {
 302                struct net_device *slave = qdisc_dev(q);
 303                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 304
 305                if (slave_txq->qdisc_sleeping != q)
 306                        continue;
 307                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 308                    !netif_running(slave)) {
 309                        busy = 1;
 310                        continue;
 311                }
 312
 313                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 314                case 0:
 315                        if (__netif_tx_trylock(slave_txq)) {
 316                                unsigned int length = qdisc_pkt_len(skb);
 317
 318                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 319                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 320                                    NETDEV_TX_OK) {
 321                                        __netif_tx_unlock(slave_txq);
 322                                        master->slaves = NEXT_SLAVE(q);
 323                                        netif_wake_queue(dev);
 324                                        master->tx_packets++;
 325                                        master->tx_bytes += length;
 326                                        return NETDEV_TX_OK;
 327                                }
 328                                __netif_tx_unlock(slave_txq);
 329                        }
 330                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 331                                busy = 1;
 332                        break;
 333                case 1:
 334                        master->slaves = NEXT_SLAVE(q);
 335                        return NETDEV_TX_OK;
 336                default:
 337                        nores = 1;
 338                        break;
 339                }
 340                __skb_pull(skb, skb_network_offset(skb));
 341        } while ((q = NEXT_SLAVE(q)) != start);
 342
 343        if (nores && skb_res == NULL) {
 344                skb_res = skb;
 345                goto restart;
 346        }
 347
 348        if (busy) {
 349                netif_stop_queue(dev);
 350                return NETDEV_TX_BUSY;
 351        }
 352        master->tx_errors++;
 353
 354drop:
 355        master->tx_dropped++;
 356        dev_kfree_skb(skb);
 357        return NETDEV_TX_OK;
 358}
 359
 360static int teql_master_open(struct net_device *dev)
 361{
 362        struct Qdisc *q;
 363        struct teql_master *m = netdev_priv(dev);
 364        int mtu = 0xFFFE;
 365        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 366
 367        if (m->slaves == NULL)
 368                return -EUNATCH;
 369
 370        flags = FMASK;
 371
 372        q = m->slaves;
 373        do {
 374                struct net_device *slave = qdisc_dev(q);
 375
 376                if (slave == NULL)
 377                        return -EUNATCH;
 378
 379                if (slave->mtu < mtu)
 380                        mtu = slave->mtu;
 381                if (slave->hard_header_len > LL_MAX_HEADER)
 382                        return -EINVAL;
 383
 384                /* If all the slaves are BROADCAST, master is BROADCAST
 385                   If all the slaves are PtP, master is PtP
 386                   Otherwise, master is NBMA.
 387                 */
 388                if (!(slave->flags&IFF_POINTOPOINT))
 389                        flags &= ~IFF_POINTOPOINT;
 390                if (!(slave->flags&IFF_BROADCAST))
 391                        flags &= ~IFF_BROADCAST;
 392                if (!(slave->flags&IFF_MULTICAST))
 393                        flags &= ~IFF_MULTICAST;
 394        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 395
 396        m->dev->mtu = mtu;
 397        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 398        netif_start_queue(m->dev);
 399        return 0;
 400}
 401
 402static int teql_master_close(struct net_device *dev)
 403{
 404        netif_stop_queue(dev);
 405        return 0;
 406}
 407
 408static void teql_master_stats64(struct net_device *dev,
 409                                struct rtnl_link_stats64 *stats)
 410{
 411        struct teql_master *m = netdev_priv(dev);
 412
 413        stats->tx_packets       = m->tx_packets;
 414        stats->tx_bytes         = m->tx_bytes;
 415        stats->tx_errors        = m->tx_errors;
 416        stats->tx_dropped       = m->tx_dropped;
 417}
 418
 419static int teql_master_mtu(struct net_device *dev, int new_mtu)
 420{
 421        struct teql_master *m = netdev_priv(dev);
 422        struct Qdisc *q;
 423
 424        q = m->slaves;
 425        if (q) {
 426                do {
 427                        if (new_mtu > qdisc_dev(q)->mtu)
 428                                return -EINVAL;
 429                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 430        }
 431
 432        dev->mtu = new_mtu;
 433        return 0;
 434}
 435
 436static const struct net_device_ops teql_netdev_ops = {
 437        .ndo_open       = teql_master_open,
 438        .ndo_stop       = teql_master_close,
 439        .ndo_start_xmit = teql_master_xmit,
 440        .ndo_get_stats64 = teql_master_stats64,
 441        .ndo_change_mtu = teql_master_mtu,
 442};
 443
 444static __init void teql_master_setup(struct net_device *dev)
 445{
 446        struct teql_master *master = netdev_priv(dev);
 447        struct Qdisc_ops *ops = &master->qops;
 448
 449        master->dev     = dev;
 450        ops->priv_size  = sizeof(struct teql_sched_data);
 451
 452        ops->enqueue    =       teql_enqueue;
 453        ops->dequeue    =       teql_dequeue;
 454        ops->peek       =       teql_peek;
 455        ops->init       =       teql_qdisc_init;
 456        ops->reset      =       teql_reset;
 457        ops->destroy    =       teql_destroy;
 458        ops->owner      =       THIS_MODULE;
 459
 460        dev->netdev_ops =       &teql_netdev_ops;
 461        dev->type               = ARPHRD_VOID;
 462        dev->mtu                = 1500;
 463        dev->min_mtu            = 68;
 464        dev->max_mtu            = 65535;
 465        dev->tx_queue_len       = 100;
 466        dev->flags              = IFF_NOARP;
 467        dev->hard_header_len    = LL_MAX_HEADER;
 468        netif_keep_dst(dev);
 469}
 470
 471static LIST_HEAD(master_dev_list);
 472static int max_equalizers = 1;
 473module_param(max_equalizers, int, 0);
 474MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 475
 476static int __init teql_init(void)
 477{
 478        int i;
 479        int err = -ENODEV;
 480
 481        for (i = 0; i < max_equalizers; i++) {
 482                struct net_device *dev;
 483                struct teql_master *master;
 484
 485                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 486                                   NET_NAME_UNKNOWN, teql_master_setup);
 487                if (!dev) {
 488                        err = -ENOMEM;
 489                        break;
 490                }
 491
 492                if ((err = register_netdev(dev))) {
 493                        free_netdev(dev);
 494                        break;
 495                }
 496
 497                master = netdev_priv(dev);
 498
 499                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 500                err = register_qdisc(&master->qops);
 501
 502                if (err) {
 503                        unregister_netdev(dev);
 504                        free_netdev(dev);
 505                        break;
 506                }
 507
 508                list_add_tail(&master->master_list, &master_dev_list);
 509        }
 510        return i ? 0 : err;
 511}
 512
 513static void __exit teql_exit(void)
 514{
 515        struct teql_master *master, *nxt;
 516
 517        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 518
 519                list_del(&master->master_list);
 520
 521                unregister_qdisc(&master->qops);
 522                unregister_netdev(master->dev);
 523                free_netdev(master->dev);
 524        }
 525}
 526
 527module_init(teql_init);
 528module_exit(teql_exit);
 529
 530MODULE_LICENSE("GPL");
 531