linux/net/sched/sch_cbs.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_cbs.c  Credit Based Shaper
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Vinicius Costa Gomes <vinicius.gomes@intel.com>
  10 *
  11 */
  12
  13/* Credit Based Shaper (CBS)
  14 * =========================
  15 *
  16 * This is a simple rate-limiting shaper aimed at TSN applications on
  17 * systems with known traffic workloads.
  18 *
  19 * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
  20 * Section 8.6.8.2, and explained in more detail in the Annex L of the
  21 * same specification.
  22 *
  23 * There are four tunables to be considered:
  24 *
  25 *      'idleslope': Idleslope is the rate of credits that is
  26 *      accumulated (in kilobits per second) when there is at least
  27 *      one packet waiting for transmission. Packets are transmitted
  28 *      when the current value of credits is equal or greater than
  29 *      zero. When there is no packet to be transmitted the amount of
  30 *      credits is set to zero. This is the main tunable of the CBS
  31 *      algorithm.
  32 *
  33 *      'sendslope':
  34 *      Sendslope is the rate of credits that is depleted (it should be a
  35 *      negative number of kilobits per second) when a transmission is
  36 *      ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
  37 *      8.6.8.2 item g):
  38 *
  39 *      sendslope = idleslope - port_transmit_rate
  40 *
  41 *      'hicredit': Hicredit defines the maximum amount of credits (in
  42 *      bytes) that can be accumulated. Hicredit depends on the
  43 *      characteristics of interfering traffic,
  44 *      'max_interference_size' is the maximum size of any burst of
  45 *      traffic that can delay the transmission of a frame that is
  46 *      available for transmission for this traffic class, (IEEE
  47 *      802.1Q-2014 Annex L, Equation L-3):
  48 *
  49 *      hicredit = max_interference_size * (idleslope / port_transmit_rate)
  50 *
  51 *      'locredit': Locredit is the minimum amount of credits that can
  52 *      be reached. It is a function of the traffic flowing through
  53 *      this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
  54 *
  55 *      locredit = max_frame_size * (sendslope / port_transmit_rate)
  56 */
  57
  58#include <linux/module.h>
  59#include <linux/types.h>
  60#include <linux/kernel.h>
  61#include <linux/string.h>
  62#include <linux/errno.h>
  63#include <linux/skbuff.h>
  64#include <net/netlink.h>
  65#include <net/sch_generic.h>
  66#include <net/pkt_sched.h>
  67
  68#define BYTES_PER_KBIT (1000LL / 8)
  69
  70struct cbs_sched_data {
  71        bool offload;
  72        int queue;
  73        s64 port_rate; /* in bytes/s */
  74        s64 last; /* timestamp in ns */
  75        s64 credits; /* in bytes */
  76        s32 locredit; /* in bytes */
  77        s32 hicredit; /* in bytes */
  78        s64 sendslope; /* in bytes/s */
  79        s64 idleslope; /* in bytes/s */
  80        struct qdisc_watchdog watchdog;
  81        int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
  82        struct sk_buff *(*dequeue)(struct Qdisc *sch);
  83};
  84
  85static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
  86{
  87        return qdisc_enqueue_tail(skb, sch);
  88}
  89
  90static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
  91{
  92        struct cbs_sched_data *q = qdisc_priv(sch);
  93
  94        if (sch->q.qlen == 0 && q->credits > 0) {
  95                /* We need to stop accumulating credits when there's
  96                 * no enqueued packets and q->credits is positive.
  97                 */
  98                q->credits = 0;
  99                q->last = ktime_get_ns();
 100        }
 101
 102        return qdisc_enqueue_tail(skb, sch);
 103}
 104
 105static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 106                       struct sk_buff **to_free)
 107{
 108        struct cbs_sched_data *q = qdisc_priv(sch);
 109
 110        return q->enqueue(skb, sch);
 111}
 112
 113/* timediff is in ns, slope is in bytes/s */
 114static s64 timediff_to_credits(s64 timediff, s64 slope)
 115{
 116        return div64_s64(timediff * slope, NSEC_PER_SEC);
 117}
 118
 119static s64 delay_from_credits(s64 credits, s64 slope)
 120{
 121        if (unlikely(slope == 0))
 122                return S64_MAX;
 123
 124        return div64_s64(-credits * NSEC_PER_SEC, slope);
 125}
 126
 127static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
 128{
 129        if (unlikely(port_rate == 0))
 130                return S64_MAX;
 131
 132        return div64_s64(len * slope, port_rate);
 133}
 134
 135static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
 136{
 137        struct cbs_sched_data *q = qdisc_priv(sch);
 138        s64 now = ktime_get_ns();
 139        struct sk_buff *skb;
 140        s64 credits;
 141        int len;
 142
 143        if (q->credits < 0) {
 144                credits = timediff_to_credits(now - q->last, q->idleslope);
 145
 146                credits = q->credits + credits;
 147                q->credits = min_t(s64, credits, q->hicredit);
 148
 149                if (q->credits < 0) {
 150                        s64 delay;
 151
 152                        delay = delay_from_credits(q->credits, q->idleslope);
 153                        qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
 154
 155                        q->last = now;
 156
 157                        return NULL;
 158                }
 159        }
 160
 161        skb = qdisc_dequeue_head(sch);
 162        if (!skb)
 163                return NULL;
 164
 165        len = qdisc_pkt_len(skb);
 166
 167        /* As sendslope is a negative number, this will decrease the
 168         * amount of q->credits.
 169         */
 170        credits = credits_from_len(len, q->sendslope, q->port_rate);
 171        credits += q->credits;
 172
 173        q->credits = max_t(s64, credits, q->locredit);
 174        q->last = now;
 175
 176        return skb;
 177}
 178
 179static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
 180{
 181        return qdisc_dequeue_head(sch);
 182}
 183
 184static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
 185{
 186        struct cbs_sched_data *q = qdisc_priv(sch);
 187
 188        return q->dequeue(sch);
 189}
 190
 191static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
 192        [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
 193};
 194
 195static void cbs_disable_offload(struct net_device *dev,
 196                                struct cbs_sched_data *q)
 197{
 198        struct tc_cbs_qopt_offload cbs = { };
 199        const struct net_device_ops *ops;
 200        int err;
 201
 202        if (!q->offload)
 203                return;
 204
 205        q->enqueue = cbs_enqueue_soft;
 206        q->dequeue = cbs_dequeue_soft;
 207
 208        ops = dev->netdev_ops;
 209        if (!__rh_has_ndo_setup_tc(dev))
 210                return;
 211
 212        cbs.queue = q->queue;
 213        cbs.enable = 0;
 214
 215        err = __rh_call_ndo_setup_tc(dev, 0, TC_SETUP_QDISC_CBS, &cbs);
 216        if (err < 0)
 217                pr_warn("Couldn't disable CBS offload for queue %d\n",
 218                        cbs.queue);
 219}
 220
 221static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
 222                              const struct tc_cbs_qopt *opt)
 223{
 224        const struct net_device_ops *ops __maybe_unused = dev->netdev_ops;
 225        struct tc_cbs_qopt_offload cbs = { };
 226        int err;
 227
 228        if (!__rh_has_ndo_setup_tc(dev))
 229                return -EOPNOTSUPP;
 230
 231        cbs.queue = q->queue;
 232
 233        cbs.enable = 1;
 234        cbs.hicredit = opt->hicredit;
 235        cbs.locredit = opt->locredit;
 236        cbs.idleslope = opt->idleslope;
 237        cbs.sendslope = opt->sendslope;
 238
 239        err = __rh_call_ndo_setup_tc(dev, 0, TC_SETUP_QDISC_CBS, &cbs);
 240        if (err < 0)
 241                return err;
 242
 243        q->enqueue = cbs_enqueue_offload;
 244        q->dequeue = cbs_dequeue_offload;
 245
 246        return 0;
 247}
 248
 249static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
 250{
 251        struct cbs_sched_data *q = qdisc_priv(sch);
 252        struct net_device *dev = qdisc_dev(sch);
 253        struct nlattr *tb[TCA_CBS_MAX + 1];
 254        struct tc_cbs_qopt *qopt;
 255        int err;
 256
 257        err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy);
 258        if (err < 0)
 259                return err;
 260
 261        if (!tb[TCA_CBS_PARMS])
 262                return -EINVAL;
 263
 264        qopt = nla_data(tb[TCA_CBS_PARMS]);
 265
 266        if (!qopt->offload) {
 267                struct ethtool_link_ksettings ecmd;
 268                s64 link_speed;
 269
 270                if (!__ethtool_get_link_ksettings(dev, &ecmd))
 271                        link_speed = ecmd.base.speed;
 272                else
 273                        link_speed = SPEED_1000;
 274
 275                q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
 276
 277                cbs_disable_offload(dev, q);
 278        } else {
 279                err = cbs_enable_offload(dev, q, qopt);
 280                if (err < 0)
 281                        return err;
 282        }
 283
 284        /* Everything went OK, save the parameters used. */
 285        q->hicredit = qopt->hicredit;
 286        q->locredit = qopt->locredit;
 287        q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
 288        q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
 289        q->offload = qopt->offload;
 290
 291        return 0;
 292}
 293
 294static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
 295{
 296        struct cbs_sched_data *q = qdisc_priv(sch);
 297        struct net_device *dev = qdisc_dev(sch);
 298
 299        if (!opt)
 300                return -EINVAL;
 301
 302        q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 303
 304        q->enqueue = cbs_enqueue_soft;
 305        q->dequeue = cbs_dequeue_soft;
 306
 307        qdisc_watchdog_init(&q->watchdog, sch);
 308
 309        return cbs_change(sch, opt);
 310}
 311
 312static void cbs_destroy(struct Qdisc *sch)
 313{
 314        struct cbs_sched_data *q = qdisc_priv(sch);
 315        struct net_device *dev = qdisc_dev(sch);
 316
 317        /*
 318         * RHEL: don't try further uninit if qdisc_watchdog_init()
 319         * and cbs_change() have not been called.
 320         */
 321        if (!q->watchdog.qdisc)
 322                return;
 323
 324        qdisc_watchdog_cancel(&q->watchdog);
 325
 326        cbs_disable_offload(dev, q);
 327}
 328
 329static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
 330{
 331        struct cbs_sched_data *q = qdisc_priv(sch);
 332        struct tc_cbs_qopt opt = { };
 333        struct nlattr *nest;
 334
 335        nest = nla_nest_start(skb, TCA_OPTIONS);
 336        if (!nest)
 337                goto nla_put_failure;
 338
 339        opt.hicredit = q->hicredit;
 340        opt.locredit = q->locredit;
 341        opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
 342        opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
 343        opt.offload = q->offload;
 344
 345        if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
 346                goto nla_put_failure;
 347
 348        return nla_nest_end(skb, nest);
 349
 350nla_put_failure:
 351        nla_nest_cancel(skb, nest);
 352        return -1;
 353}
 354
 355static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
 356        .id             =       "cbs",
 357        .priv_size      =       sizeof(struct cbs_sched_data),
 358        .enqueue        =       cbs_enqueue,
 359        .dequeue        =       cbs_dequeue,
 360        .peek           =       qdisc_peek_dequeued,
 361        .init           =       cbs_init,
 362        .reset          =       qdisc_reset_queue,
 363        .destroy        =       cbs_destroy,
 364        .change         =       cbs_change,
 365        .dump           =       cbs_dump,
 366        .owner          =       THIS_MODULE,
 367};
 368
 369static int __init cbs_module_init(void)
 370{
 371        return register_qdisc(&cbs_qdisc_ops);
 372}
 373
 374static void __exit cbs_module_exit(void)
 375{
 376        unregister_qdisc(&cbs_qdisc_ops);
 377}
 378module_init(cbs_module_init)
 379module_exit(cbs_module_exit)
 380MODULE_LICENSE("GPL");
 381