linux/net/sched/sch_teql.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   3 *
   4 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/if_arp.h>
  14#include <linux/netdevice.h>
  15#include <linux/init.h>
  16#include <linux/skbuff.h>
  17#include <linux/moduleparam.h>
  18#include <net/dst.h>
  19#include <net/neighbour.h>
  20#include <net/pkt_sched.h>
  21
  22/*
  23   How to setup it.
  24   ----------------
  25
  26   After loading this module you will find a new device teqlN
  27   and new qdisc with the same name. To join a slave to the equalizer
  28   you should just set this qdisc on a device f.e.
  29
  30   # tc qdisc add dev eth0 root teql0
  31   # tc qdisc add dev eth1 root teql0
  32
  33   That's all. Full PnP 8)
  34
  35   Applicability.
  36   --------------
  37
  38   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  39      signal and generate EOI events. If you want to equalize virtual devices
  40      like tunnels, use a normal eql device.
  41   2. This device puts no limitations on physical slave characteristics
  42      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  43      Certainly, large difference in link speeds will make the resulting
  44      eqalized link unusable, because of huge packet reordering.
  45      I estimate an upper useful difference as ~10 times.
  46   3. If the slave requires address resolution, only protocols using
  47      neighbour cache (IPv4/IPv6) will work over the equalized link.
  48      Other protocols are still allowed to use the slave device directly,
  49      which will not break load balancing, though native slave
  50      traffic will have the highest priority.  */
  51
  52struct teql_master {
  53        struct Qdisc_ops qops;
  54        struct net_device *dev;
  55        struct Qdisc *slaves;
  56        struct list_head master_list;
  57        unsigned long   tx_bytes;
  58        unsigned long   tx_packets;
  59        unsigned long   tx_errors;
  60        unsigned long   tx_dropped;
  61};
  62
  63struct teql_sched_data {
  64        struct Qdisc *next;
  65        struct teql_master *m;
  66        struct sk_buff_head q;
  67};
  68
  69#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  70
  71#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  72
  73/* "teql*" qdisc routines */
  74
  75static int
  76teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  77{
  78        struct net_device *dev = qdisc_dev(sch);
  79        struct teql_sched_data *q = qdisc_priv(sch);
  80
  81        if (q->q.qlen < dev->tx_queue_len) {
  82                __skb_queue_tail(&q->q, skb);
  83                return NET_XMIT_SUCCESS;
  84        }
  85
  86        return qdisc_drop(skb, sch, to_free);
  87}
  88
  89static struct sk_buff *
  90teql_dequeue(struct Qdisc *sch)
  91{
  92        struct teql_sched_data *dat = qdisc_priv(sch);
  93        struct netdev_queue *dat_queue;
  94        struct sk_buff *skb;
  95        struct Qdisc *q;
  96
  97        skb = __skb_dequeue(&dat->q);
  98        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
  99        q = rcu_dereference_bh(dat_queue->qdisc);
 100
 101        if (skb == NULL) {
 102                struct net_device *m = qdisc_dev(q);
 103                if (m) {
 104                        dat->m->slaves = sch;
 105                        netif_wake_queue(m);
 106                }
 107        } else {
 108                qdisc_bstats_update(sch, skb);
 109        }
 110        sch->q.qlen = dat->q.qlen + q->q.qlen;
 111        return skb;
 112}
 113
 114static struct sk_buff *
 115teql_peek(struct Qdisc *sch)
 116{
 117        /* teql is meant to be used as root qdisc */
 118        return NULL;
 119}
 120
 121static void
 122teql_reset(struct Qdisc *sch)
 123{
 124        struct teql_sched_data *dat = qdisc_priv(sch);
 125
 126        skb_queue_purge(&dat->q);
 127        sch->q.qlen = 0;
 128}
 129
 130static void
 131teql_destroy(struct Qdisc *sch)
 132{
 133        struct Qdisc *q, *prev;
 134        struct teql_sched_data *dat = qdisc_priv(sch);
 135        struct teql_master *master = dat->m;
 136
 137        prev = master->slaves;
 138        if (prev) {
 139                do {
 140                        q = NEXT_SLAVE(prev);
 141                        if (q == sch) {
 142                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 143                                if (q == master->slaves) {
 144                                        master->slaves = NEXT_SLAVE(q);
 145                                        if (q == master->slaves) {
 146                                                struct netdev_queue *txq;
 147                                                spinlock_t *root_lock;
 148
 149                                                txq = netdev_get_tx_queue(master->dev, 0);
 150                                                master->slaves = NULL;
 151
 152                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 153                                                spin_lock_bh(root_lock);
 154                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 155                                                spin_unlock_bh(root_lock);
 156                                        }
 157                                }
 158                                skb_queue_purge(&dat->q);
 159                                break;
 160                        }
 161
 162                } while ((prev = q) != master->slaves);
 163        }
 164}
 165
 166static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 167                           struct netlink_ext_ack *extack)
 168{
 169        struct net_device *dev = qdisc_dev(sch);
 170        struct teql_master *m = (struct teql_master *)sch->ops;
 171        struct teql_sched_data *q = qdisc_priv(sch);
 172
 173        if (dev->hard_header_len > m->dev->hard_header_len)
 174                return -EINVAL;
 175
 176        if (m->dev == dev)
 177                return -ELOOP;
 178
 179        q->m = m;
 180
 181        skb_queue_head_init(&q->q);
 182
 183        if (m->slaves) {
 184                if (m->dev->flags & IFF_UP) {
 185                        if ((m->dev->flags & IFF_POINTOPOINT &&
 186                             !(dev->flags & IFF_POINTOPOINT)) ||
 187                            (m->dev->flags & IFF_BROADCAST &&
 188                             !(dev->flags & IFF_BROADCAST)) ||
 189                            (m->dev->flags & IFF_MULTICAST &&
 190                             !(dev->flags & IFF_MULTICAST)) ||
 191                            dev->mtu < m->dev->mtu)
 192                                return -EINVAL;
 193                } else {
 194                        if (!(dev->flags&IFF_POINTOPOINT))
 195                                m->dev->flags &= ~IFF_POINTOPOINT;
 196                        if (!(dev->flags&IFF_BROADCAST))
 197                                m->dev->flags &= ~IFF_BROADCAST;
 198                        if (!(dev->flags&IFF_MULTICAST))
 199                                m->dev->flags &= ~IFF_MULTICAST;
 200                        if (dev->mtu < m->dev->mtu)
 201                                m->dev->mtu = dev->mtu;
 202                }
 203                q->next = NEXT_SLAVE(m->slaves);
 204                NEXT_SLAVE(m->slaves) = sch;
 205        } else {
 206                q->next = sch;
 207                m->slaves = sch;
 208                m->dev->mtu = dev->mtu;
 209                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 210        }
 211        return 0;
 212}
 213
 214
 215static int
 216__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 217               struct net_device *dev, struct netdev_queue *txq,
 218               struct dst_entry *dst)
 219{
 220        struct neighbour *n;
 221        int err = 0;
 222
 223        n = dst_neigh_lookup_skb(dst, skb);
 224        if (!n)
 225                return -ENOENT;
 226
 227        if (dst->dev != dev) {
 228                struct neighbour *mn;
 229
 230                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 231                neigh_release(n);
 232                if (IS_ERR(mn))
 233                        return PTR_ERR(mn);
 234                n = mn;
 235        }
 236
 237        if (neigh_event_send(n, skb_res) == 0) {
 238                int err;
 239                char haddr[MAX_ADDR_LEN];
 240
 241                neigh_ha_snapshot(haddr, n, dev);
 242                err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
 243                                      haddr, NULL, skb->len);
 244
 245                if (err < 0)
 246                        err = -EINVAL;
 247        } else {
 248                err = (skb_res == NULL) ? -EAGAIN : 1;
 249        }
 250        neigh_release(n);
 251        return err;
 252}
 253
 254static inline int teql_resolve(struct sk_buff *skb,
 255                               struct sk_buff *skb_res,
 256                               struct net_device *dev,
 257                               struct netdev_queue *txq)
 258{
 259        struct dst_entry *dst = skb_dst(skb);
 260        int res;
 261
 262        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 263                return -ENODEV;
 264
 265        if (!dev->header_ops || !dst)
 266                return 0;
 267
 268        rcu_read_lock();
 269        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 270        rcu_read_unlock();
 271
 272        return res;
 273}
 274
 275static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 276{
 277        struct teql_master *master = netdev_priv(dev);
 278        struct Qdisc *start, *q;
 279        int busy;
 280        int nores;
 281        int subq = skb_get_queue_mapping(skb);
 282        struct sk_buff *skb_res = NULL;
 283
 284        start = master->slaves;
 285
 286restart:
 287        nores = 0;
 288        busy = 0;
 289
 290        q = start;
 291        if (!q)
 292                goto drop;
 293
 294        do {
 295                struct net_device *slave = qdisc_dev(q);
 296                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 297
 298                if (slave_txq->qdisc_sleeping != q)
 299                        continue;
 300                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 301                    !netif_running(slave)) {
 302                        busy = 1;
 303                        continue;
 304                }
 305
 306                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 307                case 0:
 308                        if (__netif_tx_trylock(slave_txq)) {
 309                                unsigned int length = qdisc_pkt_len(skb);
 310
 311                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 312                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 313                                    NETDEV_TX_OK) {
 314                                        __netif_tx_unlock(slave_txq);
 315                                        master->slaves = NEXT_SLAVE(q);
 316                                        netif_wake_queue(dev);
 317                                        master->tx_packets++;
 318                                        master->tx_bytes += length;
 319                                        return NETDEV_TX_OK;
 320                                }
 321                                __netif_tx_unlock(slave_txq);
 322                        }
 323                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 324                                busy = 1;
 325                        break;
 326                case 1:
 327                        master->slaves = NEXT_SLAVE(q);
 328                        return NETDEV_TX_OK;
 329                default:
 330                        nores = 1;
 331                        break;
 332                }
 333                __skb_pull(skb, skb_network_offset(skb));
 334        } while ((q = NEXT_SLAVE(q)) != start);
 335
 336        if (nores && skb_res == NULL) {
 337                skb_res = skb;
 338                goto restart;
 339        }
 340
 341        if (busy) {
 342                netif_stop_queue(dev);
 343                return NETDEV_TX_BUSY;
 344        }
 345        master->tx_errors++;
 346
 347drop:
 348        master->tx_dropped++;
 349        dev_kfree_skb(skb);
 350        return NETDEV_TX_OK;
 351}
 352
 353static int teql_master_open(struct net_device *dev)
 354{
 355        struct Qdisc *q;
 356        struct teql_master *m = netdev_priv(dev);
 357        int mtu = 0xFFFE;
 358        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 359
 360        if (m->slaves == NULL)
 361                return -EUNATCH;
 362
 363        flags = FMASK;
 364
 365        q = m->slaves;
 366        do {
 367                struct net_device *slave = qdisc_dev(q);
 368
 369                if (slave == NULL)
 370                        return -EUNATCH;
 371
 372                if (slave->mtu < mtu)
 373                        mtu = slave->mtu;
 374                if (slave->hard_header_len > LL_MAX_HEADER)
 375                        return -EINVAL;
 376
 377                /* If all the slaves are BROADCAST, master is BROADCAST
 378                   If all the slaves are PtP, master is PtP
 379                   Otherwise, master is NBMA.
 380                 */
 381                if (!(slave->flags&IFF_POINTOPOINT))
 382                        flags &= ~IFF_POINTOPOINT;
 383                if (!(slave->flags&IFF_BROADCAST))
 384                        flags &= ~IFF_BROADCAST;
 385                if (!(slave->flags&IFF_MULTICAST))
 386                        flags &= ~IFF_MULTICAST;
 387        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 388
 389        m->dev->mtu = mtu;
 390        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 391        netif_start_queue(m->dev);
 392        return 0;
 393}
 394
 395static int teql_master_close(struct net_device *dev)
 396{
 397        netif_stop_queue(dev);
 398        return 0;
 399}
 400
 401static void teql_master_stats64(struct net_device *dev,
 402                                struct rtnl_link_stats64 *stats)
 403{
 404        struct teql_master *m = netdev_priv(dev);
 405
 406        stats->tx_packets       = m->tx_packets;
 407        stats->tx_bytes         = m->tx_bytes;
 408        stats->tx_errors        = m->tx_errors;
 409        stats->tx_dropped       = m->tx_dropped;
 410}
 411
 412static int teql_master_mtu(struct net_device *dev, int new_mtu)
 413{
 414        struct teql_master *m = netdev_priv(dev);
 415        struct Qdisc *q;
 416
 417        q = m->slaves;
 418        if (q) {
 419                do {
 420                        if (new_mtu > qdisc_dev(q)->mtu)
 421                                return -EINVAL;
 422                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 423        }
 424
 425        dev->mtu = new_mtu;
 426        return 0;
 427}
 428
 429static const struct net_device_ops teql_netdev_ops = {
 430        .ndo_open       = teql_master_open,
 431        .ndo_stop       = teql_master_close,
 432        .ndo_start_xmit = teql_master_xmit,
 433        .ndo_get_stats64 = teql_master_stats64,
 434        .ndo_change_mtu = teql_master_mtu,
 435};
 436
 437static __init void teql_master_setup(struct net_device *dev)
 438{
 439        struct teql_master *master = netdev_priv(dev);
 440        struct Qdisc_ops *ops = &master->qops;
 441
 442        master->dev     = dev;
 443        ops->priv_size  = sizeof(struct teql_sched_data);
 444
 445        ops->enqueue    =       teql_enqueue;
 446        ops->dequeue    =       teql_dequeue;
 447        ops->peek       =       teql_peek;
 448        ops->init       =       teql_qdisc_init;
 449        ops->reset      =       teql_reset;
 450        ops->destroy    =       teql_destroy;
 451        ops->owner      =       THIS_MODULE;
 452
 453        dev->netdev_ops =       &teql_netdev_ops;
 454        dev->type               = ARPHRD_VOID;
 455        dev->mtu                = 1500;
 456        dev->min_mtu            = 68;
 457        dev->max_mtu            = 65535;
 458        dev->tx_queue_len       = 100;
 459        dev->flags              = IFF_NOARP;
 460        dev->hard_header_len    = LL_MAX_HEADER;
 461        netif_keep_dst(dev);
 462}
 463
 464static LIST_HEAD(master_dev_list);
 465static int max_equalizers = 1;
 466module_param(max_equalizers, int, 0);
 467MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 468
 469static int __init teql_init(void)
 470{
 471        int i;
 472        int err = -ENODEV;
 473
 474        for (i = 0; i < max_equalizers; i++) {
 475                struct net_device *dev;
 476                struct teql_master *master;
 477
 478                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 479                                   NET_NAME_UNKNOWN, teql_master_setup);
 480                if (!dev) {
 481                        err = -ENOMEM;
 482                        break;
 483                }
 484
 485                if ((err = register_netdev(dev))) {
 486                        free_netdev(dev);
 487                        break;
 488                }
 489
 490                master = netdev_priv(dev);
 491
 492                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 493                err = register_qdisc(&master->qops);
 494
 495                if (err) {
 496                        unregister_netdev(dev);
 497                        free_netdev(dev);
 498                        break;
 499                }
 500
 501                list_add_tail(&master->master_list, &master_dev_list);
 502        }
 503        return i ? 0 : err;
 504}
 505
 506static void __exit teql_exit(void)
 507{
 508        struct teql_master *master, *nxt;
 509
 510        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 511
 512                list_del(&master->master_list);
 513
 514                unregister_qdisc(&master->qops);
 515                unregister_netdev(master->dev);
 516                free_netdev(master->dev);
 517        }
 518}
 519
 520module_init(teql_init);
 521module_exit(teql_exit);
 522
 523MODULE_LICENSE("GPL");
 524