linux/net/sched/sch_etf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
   4 *
   5 * Authors:     Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
   6 *              Vinicius Costa Gomes <vinicius.gomes@intel.com>
   7 */
   8
   9#include <linux/module.h>
  10#include <linux/types.h>
  11#include <linux/kernel.h>
  12#include <linux/string.h>
  13#include <linux/errno.h>
  14#include <linux/errqueue.h>
  15#include <linux/rbtree.h>
  16#include <linux/skbuff.h>
  17#include <linux/posix-timers.h>
  18#include <net/netlink.h>
  19#include <net/sch_generic.h>
  20#include <net/pkt_sched.h>
  21#include <net/sock.h>
  22
  23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
  24#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
  25#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
  26
  27struct etf_sched_data {
  28        bool offload;
  29        bool deadline_mode;
  30        bool skip_sock_check;
  31        int clockid;
  32        int queue;
  33        s32 delta; /* in ns */
  34        ktime_t last; /* The txtime of the last skb sent to the netdevice. */
  35        struct rb_root_cached head;
  36        struct qdisc_watchdog watchdog;
  37        ktime_t (*get_time)(void);
  38};
  39
  40static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
  41        [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
  42};
  43
  44static inline int validate_input_params(struct tc_etf_qopt *qopt,
  45                                        struct netlink_ext_ack *extack)
  46{
  47        /* Check if params comply to the following rules:
  48         *      * Clockid and delta must be valid.
  49         *
  50         *      * Dynamic clockids are not supported.
  51         *
  52         *      * Delta must be a positive integer.
  53         *
  54         * Also note that for the HW offload case, we must
  55         * expect that system clocks have been synchronized to PHC.
  56         */
  57        if (qopt->clockid < 0) {
  58                NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
  59                return -ENOTSUPP;
  60        }
  61
  62        if (qopt->clockid != CLOCK_TAI) {
  63                NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
  64                return -EINVAL;
  65        }
  66
  67        if (qopt->delta < 0) {
  68                NL_SET_ERR_MSG(extack, "Delta must be positive");
  69                return -EINVAL;
  70        }
  71
  72        return 0;
  73}
  74
  75static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
  76{
  77        struct etf_sched_data *q = qdisc_priv(sch);
  78        ktime_t txtime = nskb->tstamp;
  79        struct sock *sk = nskb->sk;
  80        ktime_t now;
  81
  82        if (q->skip_sock_check)
  83                goto skip;
  84
  85        if (!sk)
  86                return false;
  87
  88        if (!sock_flag(sk, SOCK_TXTIME))
  89                return false;
  90
  91        /* We don't perform crosstimestamping.
  92         * Drop if packet's clockid differs from qdisc's.
  93         */
  94        if (sk->sk_clockid != q->clockid)
  95                return false;
  96
  97        if (sk->sk_txtime_deadline_mode != q->deadline_mode)
  98                return false;
  99
 100skip:
 101        now = q->get_time();
 102        if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
 103                return false;
 104
 105        return true;
 106}
 107
 108static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
 109{
 110        struct etf_sched_data *q = qdisc_priv(sch);
 111        struct rb_node *p;
 112
 113        p = rb_first_cached(&q->head);
 114        if (!p)
 115                return NULL;
 116
 117        return rb_to_skb(p);
 118}
 119
 120static void reset_watchdog(struct Qdisc *sch)
 121{
 122        struct etf_sched_data *q = qdisc_priv(sch);
 123        struct sk_buff *skb = etf_peek_timesortedlist(sch);
 124        ktime_t next;
 125
 126        if (!skb) {
 127                qdisc_watchdog_cancel(&q->watchdog);
 128                return;
 129        }
 130
 131        next = ktime_sub_ns(skb->tstamp, q->delta);
 132        qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
 133}
 134
 135static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
 136{
 137        struct sock_exterr_skb *serr;
 138        struct sk_buff *clone;
 139        ktime_t txtime = skb->tstamp;
 140
 141        if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
 142                return;
 143
 144        clone = skb_clone(skb, GFP_ATOMIC);
 145        if (!clone)
 146                return;
 147
 148        serr = SKB_EXT_ERR(clone);
 149        serr->ee.ee_errno = err;
 150        serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
 151        serr->ee.ee_type = 0;
 152        serr->ee.ee_code = code;
 153        serr->ee.ee_pad = 0;
 154        serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
 155        serr->ee.ee_info = txtime; /* low part of tstamp */
 156
 157        if (sock_queue_err_skb(skb->sk, clone))
 158                kfree_skb(clone);
 159}
 160
 161static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 162                                      struct sk_buff **to_free)
 163{
 164        struct etf_sched_data *q = qdisc_priv(sch);
 165        struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
 166        ktime_t txtime = nskb->tstamp;
 167        bool leftmost = true;
 168
 169        if (!is_packet_valid(sch, nskb)) {
 170                report_sock_error(nskb, EINVAL,
 171                                  SO_EE_CODE_TXTIME_INVALID_PARAM);
 172                return qdisc_drop(nskb, sch, to_free);
 173        }
 174
 175        while (*p) {
 176                struct sk_buff *skb;
 177
 178                parent = *p;
 179                skb = rb_to_skb(parent);
 180                if (ktime_compare(txtime, skb->tstamp) >= 0) {
 181                        p = &parent->rb_right;
 182                        leftmost = false;
 183                } else {
 184                        p = &parent->rb_left;
 185                }
 186        }
 187        rb_link_node(&nskb->rbnode, parent, p);
 188        rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
 189
 190        qdisc_qstats_backlog_inc(sch, nskb);
 191        sch->q.qlen++;
 192
 193        /* Now we may need to re-arm the qdisc watchdog for the next packet. */
 194        reset_watchdog(sch);
 195
 196        return NET_XMIT_SUCCESS;
 197}
 198
 199static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
 200                                ktime_t now)
 201{
 202        struct etf_sched_data *q = qdisc_priv(sch);
 203        struct sk_buff *to_free = NULL;
 204        struct sk_buff *tmp = NULL;
 205
 206        skb_rbtree_walk_from_safe(skb, tmp) {
 207                if (ktime_after(skb->tstamp, now))
 208                        break;
 209
 210                rb_erase_cached(&skb->rbnode, &q->head);
 211
 212                /* The rbnode field in the skb re-uses these fields, now that
 213                 * we are done with the rbnode, reset them.
 214                 */
 215                skb->next = NULL;
 216                skb->prev = NULL;
 217                skb->dev = qdisc_dev(sch);
 218
 219                report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
 220
 221                qdisc_qstats_backlog_dec(sch, skb);
 222                qdisc_drop(skb, sch, &to_free);
 223                qdisc_qstats_overlimit(sch);
 224                sch->q.qlen--;
 225        }
 226
 227        kfree_skb_list(to_free);
 228}
 229
 230static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
 231{
 232        struct etf_sched_data *q = qdisc_priv(sch);
 233
 234        rb_erase_cached(&skb->rbnode, &q->head);
 235
 236        /* The rbnode field in the skb re-uses these fields, now that
 237         * we are done with the rbnode, reset them.
 238         */
 239        skb->next = NULL;
 240        skb->prev = NULL;
 241        skb->dev = qdisc_dev(sch);
 242
 243        qdisc_qstats_backlog_dec(sch, skb);
 244
 245        qdisc_bstats_update(sch, skb);
 246
 247        q->last = skb->tstamp;
 248
 249        sch->q.qlen--;
 250}
 251
 252static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
 253{
 254        struct etf_sched_data *q = qdisc_priv(sch);
 255        struct sk_buff *skb;
 256        ktime_t now, next;
 257
 258        skb = etf_peek_timesortedlist(sch);
 259        if (!skb)
 260                return NULL;
 261
 262        now = q->get_time();
 263
 264        /* Drop if packet has expired while in queue. */
 265        if (ktime_before(skb->tstamp, now)) {
 266                timesortedlist_drop(sch, skb, now);
 267                skb = NULL;
 268                goto out;
 269        }
 270
 271        /* When in deadline mode, dequeue as soon as possible and change the
 272         * txtime from deadline to (now + delta).
 273         */
 274        if (q->deadline_mode) {
 275                timesortedlist_remove(sch, skb);
 276                skb->tstamp = now;
 277                goto out;
 278        }
 279
 280        next = ktime_sub_ns(skb->tstamp, q->delta);
 281
 282        /* Dequeue only if now is within the [txtime - delta, txtime] range. */
 283        if (ktime_after(now, next))
 284                timesortedlist_remove(sch, skb);
 285        else
 286                skb = NULL;
 287
 288out:
 289        /* Now we may need to re-arm the qdisc watchdog for the next packet. */
 290        reset_watchdog(sch);
 291
 292        return skb;
 293}
 294
 295static void etf_disable_offload(struct net_device *dev,
 296                                struct etf_sched_data *q)
 297{
 298        struct tc_etf_qopt_offload etf = { };
 299        const struct net_device_ops *ops;
 300        int err;
 301
 302        if (!q->offload)
 303                return;
 304
 305        ops = dev->netdev_ops;
 306        if (!ops->ndo_setup_tc)
 307                return;
 308
 309        etf.queue = q->queue;
 310        etf.enable = 0;
 311
 312        err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 313        if (err < 0)
 314                pr_warn("Couldn't disable ETF offload for queue %d\n",
 315                        etf.queue);
 316}
 317
 318static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
 319                              struct netlink_ext_ack *extack)
 320{
 321        const struct net_device_ops *ops = dev->netdev_ops;
 322        struct tc_etf_qopt_offload etf = { };
 323        int err;
 324
 325        if (q->offload)
 326                return 0;
 327
 328        if (!ops->ndo_setup_tc) {
 329                NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
 330                return -EOPNOTSUPP;
 331        }
 332
 333        etf.queue = q->queue;
 334        etf.enable = 1;
 335
 336        err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
 337        if (err < 0) {
 338                NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
 339                return err;
 340        }
 341
 342        return 0;
 343}
 344
 345static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 346                    struct netlink_ext_ack *extack)
 347{
 348        struct etf_sched_data *q = qdisc_priv(sch);
 349        struct net_device *dev = qdisc_dev(sch);
 350        struct nlattr *tb[TCA_ETF_MAX + 1];
 351        struct tc_etf_qopt *qopt;
 352        int err;
 353
 354        if (!opt) {
 355                NL_SET_ERR_MSG(extack,
 356                               "Missing ETF qdisc options which are mandatory");
 357                return -EINVAL;
 358        }
 359
 360        err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
 361                                          extack);
 362        if (err < 0)
 363                return err;
 364
 365        if (!tb[TCA_ETF_PARMS]) {
 366                NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
 367                return -EINVAL;
 368        }
 369
 370        qopt = nla_data(tb[TCA_ETF_PARMS]);
 371
 372        pr_debug("delta %d clockid %d offload %s deadline %s\n",
 373                 qopt->delta, qopt->clockid,
 374                 OFFLOAD_IS_ON(qopt) ? "on" : "off",
 375                 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
 376
 377        err = validate_input_params(qopt, extack);
 378        if (err < 0)
 379                return err;
 380
 381        q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 382
 383        if (OFFLOAD_IS_ON(qopt)) {
 384                err = etf_enable_offload(dev, q, extack);
 385                if (err < 0)
 386                        return err;
 387        }
 388
 389        /* Everything went OK, save the parameters used. */
 390        q->delta = qopt->delta;
 391        q->clockid = qopt->clockid;
 392        q->offload = OFFLOAD_IS_ON(qopt);
 393        q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
 394        q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
 395
 396        switch (q->clockid) {
 397        case CLOCK_REALTIME:
 398                q->get_time = ktime_get_real;
 399                break;
 400        case CLOCK_MONOTONIC:
 401                q->get_time = ktime_get;
 402                break;
 403        case CLOCK_BOOTTIME:
 404                q->get_time = ktime_get_boottime;
 405                break;
 406        case CLOCK_TAI:
 407                q->get_time = ktime_get_clocktai;
 408                break;
 409        default:
 410                NL_SET_ERR_MSG(extack, "Clockid is not supported");
 411                return -ENOTSUPP;
 412        }
 413
 414        qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
 415
 416        return 0;
 417}
 418
 419static void timesortedlist_clear(struct Qdisc *sch)
 420{
 421        struct etf_sched_data *q = qdisc_priv(sch);
 422        struct rb_node *p = rb_first_cached(&q->head);
 423
 424        while (p) {
 425                struct sk_buff *skb = rb_to_skb(p);
 426
 427                p = rb_next(p);
 428
 429                rb_erase_cached(&skb->rbnode, &q->head);
 430                rtnl_kfree_skbs(skb, skb);
 431                sch->q.qlen--;
 432        }
 433}
 434
 435static void etf_reset(struct Qdisc *sch)
 436{
 437        struct etf_sched_data *q = qdisc_priv(sch);
 438
 439        /* Only cancel watchdog if it's been initialized. */
 440        if (q->watchdog.qdisc == sch)
 441                qdisc_watchdog_cancel(&q->watchdog);
 442
 443        /* No matter which mode we are on, it's safe to clear both lists. */
 444        timesortedlist_clear(sch);
 445        __qdisc_reset_queue(&sch->q);
 446
 447        sch->qstats.backlog = 0;
 448        sch->q.qlen = 0;
 449
 450        q->last = 0;
 451}
 452
 453static void etf_destroy(struct Qdisc *sch)
 454{
 455        struct etf_sched_data *q = qdisc_priv(sch);
 456        struct net_device *dev = qdisc_dev(sch);
 457
 458        /* Only cancel watchdog if it's been initialized. */
 459        if (q->watchdog.qdisc == sch)
 460                qdisc_watchdog_cancel(&q->watchdog);
 461
 462        etf_disable_offload(dev, q);
 463}
 464
 465static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 466{
 467        struct etf_sched_data *q = qdisc_priv(sch);
 468        struct tc_etf_qopt opt = { };
 469        struct nlattr *nest;
 470
 471        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
 472        if (!nest)
 473                goto nla_put_failure;
 474
 475        opt.delta = q->delta;
 476        opt.clockid = q->clockid;
 477        if (q->offload)
 478                opt.flags |= TC_ETF_OFFLOAD_ON;
 479
 480        if (q->deadline_mode)
 481                opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 482
 483        if (q->skip_sock_check)
 484                opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
 485
 486        if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
 487                goto nla_put_failure;
 488
 489        return nla_nest_end(skb, nest);
 490
 491nla_put_failure:
 492        nla_nest_cancel(skb, nest);
 493        return -1;
 494}
 495
 496static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
 497        .id             =       "etf",
 498        .priv_size      =       sizeof(struct etf_sched_data),
 499        .enqueue        =       etf_enqueue_timesortedlist,
 500        .dequeue        =       etf_dequeue_timesortedlist,
 501        .peek           =       etf_peek_timesortedlist,
 502        .init           =       etf_init,
 503        .reset          =       etf_reset,
 504        .destroy        =       etf_destroy,
 505        .dump           =       etf_dump,
 506        .owner          =       THIS_MODULE,
 507};
 508
 509static int __init etf_module_init(void)
 510{
 511        return register_qdisc(&etf_qdisc_ops);
 512}
 513
 514static void __exit etf_module_exit(void)
 515{
 516        unregister_qdisc(&etf_qdisc_ops);
 517}
 518module_init(etf_module_init)
 519module_exit(etf_module_exit)
 520MODULE_LICENSE("GPL");
 521