linux/net/sched/sch_teql.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   3 *
   4 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/if_arp.h>
  14#include <linux/netdevice.h>
  15#include <linux/init.h>
  16#include <linux/skbuff.h>
  17#include <linux/moduleparam.h>
  18#include <net/dst.h>
  19#include <net/neighbour.h>
  20#include <net/pkt_sched.h>
  21
  22/*
  23   How to setup it.
  24   ----------------
  25
  26   After loading this module you will find a new device teqlN
  27   and new qdisc with the same name. To join a slave to the equalizer
  28   you should just set this qdisc on a device f.e.
  29
  30   # tc qdisc add dev eth0 root teql0
  31   # tc qdisc add dev eth1 root teql0
  32
  33   That's all. Full PnP 8)
  34
  35   Applicability.
  36   --------------
  37
  38   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  39      signal and generate EOI events. If you want to equalize virtual devices
  40      like tunnels, use a normal eql device.
  41   2. This device puts no limitations on physical slave characteristics
  42      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  43      Certainly, large difference in link speeds will make the resulting
  44      eqalized link unusable, because of huge packet reordering.
  45      I estimate an upper useful difference as ~10 times.
  46   3. If the slave requires address resolution, only protocols using
  47      neighbour cache (IPv4/IPv6) will work over the equalized link.
  48      Other protocols are still allowed to use the slave device directly,
  49      which will not break load balancing, though native slave
  50      traffic will have the highest priority.  */
  51
  52struct teql_master {
  53        struct Qdisc_ops qops;
  54        struct net_device *dev;
  55        struct Qdisc *slaves;
  56        struct list_head master_list;
  57        unsigned long   tx_bytes;
  58        unsigned long   tx_packets;
  59        unsigned long   tx_errors;
  60        unsigned long   tx_dropped;
  61};
  62
  63struct teql_sched_data {
  64        struct Qdisc *next;
  65        struct teql_master *m;
  66        struct sk_buff_head q;
  67};
  68
  69#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  70
  71#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  72
  73/* "teql*" qdisc routines */
  74
  75static int
  76teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  77{
  78        struct net_device *dev = qdisc_dev(sch);
  79        struct teql_sched_data *q = qdisc_priv(sch);
  80
  81        if (q->q.qlen < dev->tx_queue_len) {
  82                __skb_queue_tail(&q->q, skb);
  83                return NET_XMIT_SUCCESS;
  84        }
  85
  86        return qdisc_drop(skb, sch, to_free);
  87}
  88
  89static struct sk_buff *
  90teql_dequeue(struct Qdisc *sch)
  91{
  92        struct teql_sched_data *dat = qdisc_priv(sch);
  93        struct netdev_queue *dat_queue;
  94        struct sk_buff *skb;
  95        struct Qdisc *q;
  96
  97        skb = __skb_dequeue(&dat->q);
  98        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
  99        q = rcu_dereference_bh(dat_queue->qdisc);
 100
 101        if (skb == NULL) {
 102                struct net_device *m = qdisc_dev(q);
 103                if (m) {
 104                        dat->m->slaves = sch;
 105                        netif_wake_queue(m);
 106                }
 107        } else {
 108                qdisc_bstats_update(sch, skb);
 109        }
 110        sch->q.qlen = dat->q.qlen + q->q.qlen;
 111        return skb;
 112}
 113
 114static struct sk_buff *
 115teql_peek(struct Qdisc *sch)
 116{
 117        /* teql is meant to be used as root qdisc */
 118        return NULL;
 119}
 120
 121static void
 122teql_reset(struct Qdisc *sch)
 123{
 124        struct teql_sched_data *dat = qdisc_priv(sch);
 125
 126        skb_queue_purge(&dat->q);
 127        sch->q.qlen = 0;
 128}
 129
 130static void
 131teql_destroy(struct Qdisc *sch)
 132{
 133        struct Qdisc *q, *prev;
 134        struct teql_sched_data *dat = qdisc_priv(sch);
 135        struct teql_master *master = dat->m;
 136
 137        if (!master)
 138                return;
 139
 140        prev = master->slaves;
 141        if (prev) {
 142                do {
 143                        q = NEXT_SLAVE(prev);
 144                        if (q == sch) {
 145                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 146                                if (q == master->slaves) {
 147                                        master->slaves = NEXT_SLAVE(q);
 148                                        if (q == master->slaves) {
 149                                                struct netdev_queue *txq;
 150                                                spinlock_t *root_lock;
 151
 152                                                txq = netdev_get_tx_queue(master->dev, 0);
 153                                                master->slaves = NULL;
 154
 155                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 156                                                spin_lock_bh(root_lock);
 157                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 158                                                spin_unlock_bh(root_lock);
 159                                        }
 160                                }
 161                                skb_queue_purge(&dat->q);
 162                                break;
 163                        }
 164
 165                } while ((prev = q) != master->slaves);
 166        }
 167}
 168
 169static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 170                           struct netlink_ext_ack *extack)
 171{
 172        struct net_device *dev = qdisc_dev(sch);
 173        struct teql_master *m = (struct teql_master *)sch->ops;
 174        struct teql_sched_data *q = qdisc_priv(sch);
 175
 176        if (dev->hard_header_len > m->dev->hard_header_len)
 177                return -EINVAL;
 178
 179        if (m->dev == dev)
 180                return -ELOOP;
 181
 182        q->m = m;
 183
 184        skb_queue_head_init(&q->q);
 185
 186        if (m->slaves) {
 187                if (m->dev->flags & IFF_UP) {
 188                        if ((m->dev->flags & IFF_POINTOPOINT &&
 189                             !(dev->flags & IFF_POINTOPOINT)) ||
 190                            (m->dev->flags & IFF_BROADCAST &&
 191                             !(dev->flags & IFF_BROADCAST)) ||
 192                            (m->dev->flags & IFF_MULTICAST &&
 193                             !(dev->flags & IFF_MULTICAST)) ||
 194                            dev->mtu < m->dev->mtu)
 195                                return -EINVAL;
 196                } else {
 197                        if (!(dev->flags&IFF_POINTOPOINT))
 198                                m->dev->flags &= ~IFF_POINTOPOINT;
 199                        if (!(dev->flags&IFF_BROADCAST))
 200                                m->dev->flags &= ~IFF_BROADCAST;
 201                        if (!(dev->flags&IFF_MULTICAST))
 202                                m->dev->flags &= ~IFF_MULTICAST;
 203                        if (dev->mtu < m->dev->mtu)
 204                                m->dev->mtu = dev->mtu;
 205                }
 206                q->next = NEXT_SLAVE(m->slaves);
 207                NEXT_SLAVE(m->slaves) = sch;
 208        } else {
 209                q->next = sch;
 210                m->slaves = sch;
 211                m->dev->mtu = dev->mtu;
 212                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 213        }
 214        return 0;
 215}
 216
 217
 218static int
 219__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 220               struct net_device *dev, struct netdev_queue *txq,
 221               struct dst_entry *dst)
 222{
 223        struct neighbour *n;
 224        int err = 0;
 225
 226        n = dst_neigh_lookup_skb(dst, skb);
 227        if (!n)
 228                return -ENOENT;
 229
 230        if (dst->dev != dev) {
 231                struct neighbour *mn;
 232
 233                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 234                neigh_release(n);
 235                if (IS_ERR(mn))
 236                        return PTR_ERR(mn);
 237                n = mn;
 238        }
 239
 240        if (neigh_event_send(n, skb_res) == 0) {
 241                int err;
 242                char haddr[MAX_ADDR_LEN];
 243
 244                neigh_ha_snapshot(haddr, n, dev);
 245                err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
 246                                      haddr, NULL, skb->len);
 247
 248                if (err < 0)
 249                        err = -EINVAL;
 250        } else {
 251                err = (skb_res == NULL) ? -EAGAIN : 1;
 252        }
 253        neigh_release(n);
 254        return err;
 255}
 256
 257static inline int teql_resolve(struct sk_buff *skb,
 258                               struct sk_buff *skb_res,
 259                               struct net_device *dev,
 260                               struct netdev_queue *txq)
 261{
 262        struct dst_entry *dst = skb_dst(skb);
 263        int res;
 264
 265        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 266                return -ENODEV;
 267
 268        if (!dev->header_ops || !dst)
 269                return 0;
 270
 271        rcu_read_lock();
 272        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 273        rcu_read_unlock();
 274
 275        return res;
 276}
 277
 278static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 279{
 280        struct teql_master *master = netdev_priv(dev);
 281        struct Qdisc *start, *q;
 282        int busy;
 283        int nores;
 284        int subq = skb_get_queue_mapping(skb);
 285        struct sk_buff *skb_res = NULL;
 286
 287        start = master->slaves;
 288
 289restart:
 290        nores = 0;
 291        busy = 0;
 292
 293        q = start;
 294        if (!q)
 295                goto drop;
 296
 297        do {
 298                struct net_device *slave = qdisc_dev(q);
 299                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 300
 301                if (slave_txq->qdisc_sleeping != q)
 302                        continue;
 303                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 304                    !netif_running(slave)) {
 305                        busy = 1;
 306                        continue;
 307                }
 308
 309                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 310                case 0:
 311                        if (__netif_tx_trylock(slave_txq)) {
 312                                unsigned int length = qdisc_pkt_len(skb);
 313
 314                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 315                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 316                                    NETDEV_TX_OK) {
 317                                        __netif_tx_unlock(slave_txq);
 318                                        master->slaves = NEXT_SLAVE(q);
 319                                        netif_wake_queue(dev);
 320                                        master->tx_packets++;
 321                                        master->tx_bytes += length;
 322                                        return NETDEV_TX_OK;
 323                                }
 324                                __netif_tx_unlock(slave_txq);
 325                        }
 326                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 327                                busy = 1;
 328                        break;
 329                case 1:
 330                        master->slaves = NEXT_SLAVE(q);
 331                        return NETDEV_TX_OK;
 332                default:
 333                        nores = 1;
 334                        break;
 335                }
 336                __skb_pull(skb, skb_network_offset(skb));
 337        } while ((q = NEXT_SLAVE(q)) != start);
 338
 339        if (nores && skb_res == NULL) {
 340                skb_res = skb;
 341                goto restart;
 342        }
 343
 344        if (busy) {
 345                netif_stop_queue(dev);
 346                return NETDEV_TX_BUSY;
 347        }
 348        master->tx_errors++;
 349
 350drop:
 351        master->tx_dropped++;
 352        dev_kfree_skb(skb);
 353        return NETDEV_TX_OK;
 354}
 355
 356static int teql_master_open(struct net_device *dev)
 357{
 358        struct Qdisc *q;
 359        struct teql_master *m = netdev_priv(dev);
 360        int mtu = 0xFFFE;
 361        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 362
 363        if (m->slaves == NULL)
 364                return -EUNATCH;
 365
 366        flags = FMASK;
 367
 368        q = m->slaves;
 369        do {
 370                struct net_device *slave = qdisc_dev(q);
 371
 372                if (slave == NULL)
 373                        return -EUNATCH;
 374
 375                if (slave->mtu < mtu)
 376                        mtu = slave->mtu;
 377                if (slave->hard_header_len > LL_MAX_HEADER)
 378                        return -EINVAL;
 379
 380                /* If all the slaves are BROADCAST, master is BROADCAST
 381                   If all the slaves are PtP, master is PtP
 382                   Otherwise, master is NBMA.
 383                 */
 384                if (!(slave->flags&IFF_POINTOPOINT))
 385                        flags &= ~IFF_POINTOPOINT;
 386                if (!(slave->flags&IFF_BROADCAST))
 387                        flags &= ~IFF_BROADCAST;
 388                if (!(slave->flags&IFF_MULTICAST))
 389                        flags &= ~IFF_MULTICAST;
 390        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 391
 392        m->dev->mtu = mtu;
 393        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 394        netif_start_queue(m->dev);
 395        return 0;
 396}
 397
 398static int teql_master_close(struct net_device *dev)
 399{
 400        netif_stop_queue(dev);
 401        return 0;
 402}
 403
 404static void teql_master_stats64(struct net_device *dev,
 405                                struct rtnl_link_stats64 *stats)
 406{
 407        struct teql_master *m = netdev_priv(dev);
 408
 409        stats->tx_packets       = m->tx_packets;
 410        stats->tx_bytes         = m->tx_bytes;
 411        stats->tx_errors        = m->tx_errors;
 412        stats->tx_dropped       = m->tx_dropped;
 413}
 414
 415static int teql_master_mtu(struct net_device *dev, int new_mtu)
 416{
 417        struct teql_master *m = netdev_priv(dev);
 418        struct Qdisc *q;
 419
 420        q = m->slaves;
 421        if (q) {
 422                do {
 423                        if (new_mtu > qdisc_dev(q)->mtu)
 424                                return -EINVAL;
 425                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 426        }
 427
 428        dev->mtu = new_mtu;
 429        return 0;
 430}
 431
 432static const struct net_device_ops teql_netdev_ops = {
 433        .ndo_open       = teql_master_open,
 434        .ndo_stop       = teql_master_close,
 435        .ndo_start_xmit = teql_master_xmit,
 436        .ndo_get_stats64 = teql_master_stats64,
 437        .ndo_change_mtu = teql_master_mtu,
 438};
 439
 440static __init void teql_master_setup(struct net_device *dev)
 441{
 442        struct teql_master *master = netdev_priv(dev);
 443        struct Qdisc_ops *ops = &master->qops;
 444
 445        master->dev     = dev;
 446        ops->priv_size  = sizeof(struct teql_sched_data);
 447
 448        ops->enqueue    =       teql_enqueue;
 449        ops->dequeue    =       teql_dequeue;
 450        ops->peek       =       teql_peek;
 451        ops->init       =       teql_qdisc_init;
 452        ops->reset      =       teql_reset;
 453        ops->destroy    =       teql_destroy;
 454        ops->owner      =       THIS_MODULE;
 455
 456        dev->netdev_ops =       &teql_netdev_ops;
 457        dev->type               = ARPHRD_VOID;
 458        dev->mtu                = 1500;
 459        dev->min_mtu            = 68;
 460        dev->max_mtu            = 65535;
 461        dev->tx_queue_len       = 100;
 462        dev->flags              = IFF_NOARP;
 463        dev->hard_header_len    = LL_MAX_HEADER;
 464        netif_keep_dst(dev);
 465}
 466
 467static LIST_HEAD(master_dev_list);
 468static int max_equalizers = 1;
 469module_param(max_equalizers, int, 0);
 470MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 471
 472static int __init teql_init(void)
 473{
 474        int i;
 475        int err = -ENODEV;
 476
 477        for (i = 0; i < max_equalizers; i++) {
 478                struct net_device *dev;
 479                struct teql_master *master;
 480
 481                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 482                                   NET_NAME_UNKNOWN, teql_master_setup);
 483                if (!dev) {
 484                        err = -ENOMEM;
 485                        break;
 486                }
 487
 488                if ((err = register_netdev(dev))) {
 489                        free_netdev(dev);
 490                        break;
 491                }
 492
 493                master = netdev_priv(dev);
 494
 495                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 496                err = register_qdisc(&master->qops);
 497
 498                if (err) {
 499                        unregister_netdev(dev);
 500                        free_netdev(dev);
 501                        break;
 502                }
 503
 504                list_add_tail(&master->master_list, &master_dev_list);
 505        }
 506        return i ? 0 : err;
 507}
 508
 509static void __exit teql_exit(void)
 510{
 511        struct teql_master *master, *nxt;
 512
 513        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 514
 515                list_del(&master->master_list);
 516
 517                unregister_qdisc(&master->qops);
 518                unregister_netdev(master->dev);
 519                free_netdev(master->dev);
 520        }
 521}
 522
 523module_init(teql_init);
 524module_exit(teql_exit);
 525
 526MODULE_LICENSE("GPL");
 527