linux/net/sched/sch_etf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
   4 *
   5 * Authors:     Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
   6 *              Vinicius Costa Gomes <vinicius.gomes@intel.com>
   7 */
   8
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/string.h>
  13#include <linux/errno.h>
  14#include <linux/errqueue.h>
  15#include <linux/rbtree.h>
  16#include <linux/skbuff.h>
  17#include <linux/posix-timers.h>
  18#include <net/netlink.h>
  19#include <net/sch_generic.h>
  20#include <net/pkt_sched.h>
  21#include <net/sock.h>
  22
  23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
  24#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
  25#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
  26
  27struct etf_sched_data {
  28        bool offload;
  29        bool deadline_mode;
  30        bool skip_sock_check;
  31        int clockid;
  32        int queue;
  33        s32 delta; /* in ns */
  34        ktime_t last; /* The txtime of the last skb sent to the netdevice. */
  35        struct rb_root_cached head;
  36        struct qdisc_watchdog watchdog;
  37        ktime_t (*get_time)(void);
  38};
  39
  40static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
  41        [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
  42};
  43
  44static inline int validate_input_params(struct tc_etf_qopt *qopt,
  45                                        struct netlink_ext_ack *extack)
  46{
  47        /* Check if params comply to the following rules:
  48         *      * Clockid and delta must be valid.
  49         *
  50         *      * Dynamic clockids are not supported.
  51         *
  52         *      * Delta must be a positive integer.
  53         *
  54         * Also note that for the HW offload case, we must
  55         * expect that system clocks have been synchronized to PHC.
  56         */
  57        if (qopt->clockid < 0) {
  58                NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
  59                return -ENOTSUPP;
  60        }
  61
  62        if (qopt->clockid != CLOCK_TAI) {
  63                NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
  64                return -EINVAL;
  65        }
  66
  67        if (qopt->delta < 0) {
  68                NL_SET_ERR_MSG(extack, "Delta must be positive");
  69                return -EINVAL;
  70        }
  71
  72        return 0;
  73}
  74
  75static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
  76{
  77        struct etf_sched_data *q = qdisc_priv(sch);
  78        ktime_t txtime = nskb->tstamp;
  79        struct sock *sk = nskb->sk;
  80        ktime_t now;
  81
  82        if (q->skip_sock_check)
  83                goto skip;
  84
  85        if (!sk || !sk_fullsock(sk))
  86                return false;
  87
  88        if (!sock_flag(sk, SOCK_TXTIME))
  89                return false;
  90
  91        /* We don't perform crosstimestamping.
  92         * Drop if packet's clockid differs from qdisc's.
  93         */
  94        if (sk->sk_clockid != q->clockid)
  95                return false;
  96
  97        if (sk->sk_txtime_deadline_mode != q->deadline_mode)
  98                return false;
  99
 100skip:
 101        now = q->get_time();
 102        if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
 103                return false;
 104
 105        return true;
 106}
 107
 108static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
 109{
 110        struct etf_sched_data *q = qdisc_priv(sch);
 111        struct rb_node *p;
 112
 113        p = rb_first_cached(&q->head);
 114        if (!p)
 115                return NULL;
 116
 117        return rb_to_skb(p);
 118}
 119
 120static void reset_watchdog(struct Qdisc *sch)
 121{
 122        struct etf_sched_data *q = qdisc_priv(sch);
 123        struct sk_buff *skb = etf_peek_timesortedlist(sch);
 124        ktime_t next;
 125
 126        if (!skb) {
 127                qdisc_watchdog_cancel(&q->watchdog);
 128                return;
 129        }
 130
 131        next = ktime_sub_ns(skb->tstamp, q->delta);
 132        qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
 133}
 134
 135static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
 136{
 137        struct sock_exterr_skb *serr;
 138        struct sk_buff *clone;
 139        ktime_t txtime = skb->tstamp;
 140        struct sock *sk = skb->sk;
 141
 142        if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
 143                return;
 144
 145        clone = skb_clone(skb, GFP_ATOMIC);
 146        if (!clone)
 147                return;
 148
 149        serr = SKB_EXT_ERR(clone);
 150        serr->ee.ee_errno = err;
 151        serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
 152        serr->ee.ee_type = 0;
 153        serr->ee.ee_code = code;
 154        serr->ee.ee_pad = 0;
 155        serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
 156        serr->ee.ee_info = txtime; /* low part of tstamp */
 157
 158        if (sock_queue_err_skb(sk, clone))
 159                kfree_skb(clone);
 160}
 161
 162static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 163                                      struct sk_buff **to_free)
 164{
 165        struct etf_sched_data *q = qdisc_priv(sch);
 166        struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
 167        ktime_t txtime = nskb->tstamp;
 168        bool leftmost = true;
 169
 170        if (!is_packet_valid(sch, nskb)) {
 171                report_sock_error(nskb, EINVAL,
 172                                  SO_EE_CODE_TXTIME_INVALID_PARAM);
 173                return qdisc_drop(nskb, sch, to_free);
 174        }
 175
 176        while (*p) {
 177                struct sk_buff *skb;
 178
 179                parent = *p;
 180                skb = rb_to_skb(parent);
 181                if (ktime_compare(txtime, skb->tstamp) >= 0) {
 182                        p = &parent->rb_right;
 183                        leftmost = false;
 184                } else {
 185                        p = &parent->rb_left;
 186                }
 187        }
 188        rb_link_node(&nskb->rbnode, parent, p);
 189        rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
 190
 191        qdisc_qstats_backlog_inc(sch, nskb);
 192        sch->q.qlen++;
 193
 194        /* Now we may need to re-arm the qdisc watchdog for the next packet. */
 195        reset_watchdog(sch);
 196
 197        return NET_XMIT_SUCCESS;
 198}
 199
 200static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
 201                                ktime_t now)
 202{
 203        struct etf_sched_data *q = qdisc_priv(sch);
 204        struct sk_buff *to_free = NULL;
 205        struct sk_buff *tmp = NULL;
 206
 207        skb_rbtree_walk_from_safe(skb, tmp) {
 208                if (ktime_after(skb->tstamp, now))
 209                        break;
 210
 211                rb_erase_cached(&skb->rbnode, &q->head);
 212
 213                /* The rbnode field in the skb re-uses these fields, now that
 214                 * we are done with the rbnode, reset them.
 215                 */
 216                skb->next = NULL;
 217                skb->prev = NULL;
 218                skb->dev = qdisc_dev(sch);
 219
 220                report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
 221
 222                qdisc_qstats_backlog_dec(sch, skb);
 223                qdisc_drop(skb, sch, &to_free);
 224                qdisc_qstats_overlimit(sch);
 225                sch->q.qlen--;
 226        }
 227
 228        kfree_skb_list(to_free);
 229}
 230
 231static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
 232{
 233        struct etf_sched_data *q = qdisc_priv(sch);
 234
 235        rb_erase_cached(&skb->rbnode, &q->head);
 236
 237        /* The rbnode field in the skb re-uses these fields, now that
 238         * we are done with the rbnode, reset them.
 239         */
 240        skb->next = NULL;
 241        skb->prev = NULL;
 242        skb->dev = qdisc_dev(sch);
 243
 244        qdisc_qstats_backlog_dec(sch, skb);
 245
 246        qdisc_bstats_update(sch, skb);
 247
 248        q->last = skb->tstamp;
 249
 250        sch->q.qlen--;
 251}
 252
 253static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
 254{
 255        struct etf_sched_data *q = qdisc_priv(sch);
 256        struct sk_buff *skb;
 257        ktime_t now, next;
 258
 259        skb = etf_peek_timesortedlist(sch);
 260        if (!skb)
 261                return NULL;
 262
 263        now = q->get_time();
 264
 265        /* Drop if packet has expired while in queue. */
 266        if (ktime_before(skb->tstamp, now)) {
 267                timesortedlist_drop(sch, skb, now);
 268                skb = NULL;
 269                goto out;
 270        }
 271
 272        /* When in deadline mode, dequeue as soon as possible and change the
 273         * txtime from deadline to (now + delta).
 274         */
 275        if (q->deadline_mode) {
 276                timesortedlist_remove(sch, skb);
 277                skb->tstamp = now;
 278                goto out;
 279        }
 280
 281        next = ktime_sub_ns(skb->tstamp, q->delta);
 282
 283        /* Dequeue only if now is within the [txtime - delta, txtime] range. */
 284        if (ktime_after(now, next))
 285                timesortedlist_remove(sch, skb);
 286        else
 287                skb = NULL;
 288
 289out:
 290        /* Now we may need to re-arm the qdisc watchdog for the next packet. */
 291        reset_watchdog(sch);
 292
 293        return skb;
 294}
 295
 296static void etf_disable_offload(struct net_device *dev,
 297                                struct etf_sched_data *q)
 298{
 299        struct tc_etf_qopt_offload etf = { };
 300        const struct net_device_ops *ops;
 301        int err;
 302
 303        if (!q->offload)
 304                return;
 305
 306        ops = dev->netdev_ops;
 307        if (!ops->ndo_setup_tc)
 308                return;
 309
 310        etf.queue = q->queue;
 311        etf.enable = 0;
 312
 313        err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 314        if (err < 0)
 315                pr_warn("Couldn't disable ETF offload for queue %d\n",
 316                        etf.queue);
 317}
 318
 319static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
 320                              struct netlink_ext_ack *extack)
 321{
 322        const struct net_device_ops *ops = dev->netdev_ops;
 323        struct tc_etf_qopt_offload etf = { };
 324        int err;
 325
 326        if (q->offload)
 327                return 0;
 328
 329        if (!ops->ndo_setup_tc) {
 330                NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
 331                return -EOPNOTSUPP;
 332        }
 333
 334        etf.queue = q->queue;
 335        etf.enable = 1;
 336
 337        err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 338        if (err < 0) {
 339                NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
 340                return err;
 341        }
 342
 343        return 0;
 344}
 345
 346static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 347                    struct netlink_ext_ack *extack)
 348{
 349        struct etf_sched_data *q = qdisc_priv(sch);
 350        struct net_device *dev = qdisc_dev(sch);
 351        struct nlattr *tb[TCA_ETF_MAX + 1];
 352        struct tc_etf_qopt *qopt;
 353        int err;
 354
 355        if (!opt) {
 356                NL_SET_ERR_MSG(extack,
 357                               "Missing ETF qdisc options which are mandatory");
 358                return -EINVAL;
 359        }
 360
 361        err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
 362                                          extack);
 363        if (err < 0)
 364                return err;
 365
 366        if (!tb[TCA_ETF_PARMS]) {
 367                NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
 368                return -EINVAL;
 369        }
 370
 371        qopt = nla_data(tb[TCA_ETF_PARMS]);
 372
 373        pr_debug("delta %d clockid %d offload %s deadline %s\n",
 374                 qopt->delta, qopt->clockid,
 375                 OFFLOAD_IS_ON(qopt) ? "on" : "off",
 376                 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
 377
 378        err = validate_input_params(qopt, extack);
 379        if (err < 0)
 380                return err;
 381
 382        q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 383
 384        if (OFFLOAD_IS_ON(qopt)) {
 385                err = etf_enable_offload(dev, q, extack);
 386                if (err < 0)
 387                        return err;
 388        }
 389
 390        /* Everything went OK, save the parameters used. */
 391        q->delta = qopt->delta;
 392        q->clockid = qopt->clockid;
 393        q->offload = OFFLOAD_IS_ON(qopt);
 394        q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
 395        q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
 396
 397        switch (q->clockid) {
 398        case CLOCK_REALTIME:
 399                q->get_time = ktime_get_real;
 400                break;
 401        case CLOCK_MONOTONIC:
 402                q->get_time = ktime_get;
 403                break;
 404        case CLOCK_BOOTTIME:
 405                q->get_time = ktime_get_boottime;
 406                break;
 407        case CLOCK_TAI:
 408                q->get_time = ktime_get_clocktai;
 409                break;
 410        default:
 411                NL_SET_ERR_MSG(extack, "Clockid is not supported");
 412                return -ENOTSUPP;
 413        }
 414
 415        qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
 416
 417        return 0;
 418}
 419
 420static void timesortedlist_clear(struct Qdisc *sch)
 421{
 422        struct etf_sched_data *q = qdisc_priv(sch);
 423        struct rb_node *p = rb_first_cached(&q->head);
 424
 425        while (p) {
 426                struct sk_buff *skb = rb_to_skb(p);
 427
 428                p = rb_next(p);
 429
 430                rb_erase_cached(&skb->rbnode, &q->head);
 431                rtnl_kfree_skbs(skb, skb);
 432                sch->q.qlen--;
 433        }
 434}
 435
 436static void etf_reset(struct Qdisc *sch)
 437{
 438        struct etf_sched_data *q = qdisc_priv(sch);
 439
 440        /* Only cancel watchdog if it's been initialized. */
 441        if (q->watchdog.qdisc == sch)
 442                qdisc_watchdog_cancel(&q->watchdog);
 443
 444        /* No matter which mode we are on, it's safe to clear both lists. */
 445        timesortedlist_clear(sch);
 446        __qdisc_reset_queue(&sch->q);
 447
 448        sch->qstats.backlog = 0;
 449        sch->q.qlen = 0;
 450
 451        q->last = 0;
 452}
 453
 454static void etf_destroy(struct Qdisc *sch)
 455{
 456        struct etf_sched_data *q = qdisc_priv(sch);
 457        struct net_device *dev = qdisc_dev(sch);
 458
 459        /* Only cancel watchdog if it's been initialized. */
 460        if (q->watchdog.qdisc == sch)
 461                qdisc_watchdog_cancel(&q->watchdog);
 462
 463        etf_disable_offload(dev, q);
 464}
 465
 466static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 467{
 468        struct etf_sched_data *q = qdisc_priv(sch);
 469        struct tc_etf_qopt opt = { };
 470        struct nlattr *nest;
 471
 472        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 473        if (!nest)
 474                goto nla_put_failure;
 475
 476        opt.delta = q->delta;
 477        opt.clockid = q->clockid;
 478        if (q->offload)
 479                opt.flags |= TC_ETF_OFFLOAD_ON;
 480
 481        if (q->deadline_mode)
 482                opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 483
 484        if (q->skip_sock_check)
 485                opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
 486
 487        if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
 488                goto nla_put_failure;
 489
 490        return nla_nest_end(skb, nest);
 491
 492nla_put_failure:
 493        nla_nest_cancel(skb, nest);
 494        return -1;
 495}
 496
 497static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
 498        .id             =       "etf",
 499        .priv_size      =       sizeof(struct etf_sched_data),
 500        .enqueue        =       etf_enqueue_timesortedlist,
 501        .dequeue        =       etf_dequeue_timesortedlist,
 502        .peek           =       etf_peek_timesortedlist,
 503        .init           =       etf_init,
 504        .reset          =       etf_reset,
 505        .destroy        =       etf_destroy,
 506        .dump           =       etf_dump,
 507        .owner          =       THIS_MODULE,
 508};
 509
 510static int __init etf_module_init(void)
 511{
 512        return register_qdisc(&etf_qdisc_ops);
 513}
 514
 515static void __exit etf_module_exit(void)
 516{
 517        unregister_qdisc(&etf_qdisc_ops);
 518}
 519module_init(etf_module_init)
 520module_exit(etf_module_exit)
 521MODULE_LICENSE("GPL");
 522