linux/net/sched/sch_prio.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_prio.c Simple 3-band priority "scheduler".
   4 *
   5 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   6 * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
   7 *              Init --  EINVAL when opt undefined
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/slab.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/skbuff.h>
  17#include <net/netlink.h>
  18#include <net/pkt_sched.h>
  19#include <net/pkt_cls.h>
  20
  21struct prio_sched_data {
  22        int bands;
  23        struct tcf_proto __rcu *filter_list;
  24        struct tcf_block *block;
  25        u8  prio2band[TC_PRIO_MAX+1];
  26        struct Qdisc *queues[TCQ_PRIO_BANDS];
  27};
  28
  29
  30static struct Qdisc *
  31prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
  32{
  33        struct prio_sched_data *q = qdisc_priv(sch);
  34        u32 band = skb->priority;
  35        struct tcf_result res;
  36        struct tcf_proto *fl;
  37        int err;
  38
  39        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
  40        if (TC_H_MAJ(skb->priority) != sch->handle) {
  41                fl = rcu_dereference_bh(q->filter_list);
  42                err = tcf_classify(skb, NULL, fl, &res, false);
  43#ifdef CONFIG_NET_CLS_ACT
  44                switch (err) {
  45                case TC_ACT_STOLEN:
  46                case TC_ACT_QUEUED:
  47                case TC_ACT_TRAP:
  48                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
  49                        fallthrough;
  50                case TC_ACT_SHOT:
  51                        return NULL;
  52                }
  53#endif
  54                if (!fl || err < 0) {
  55                        if (TC_H_MAJ(band))
  56                                band = 0;
  57                        return q->queues[q->prio2band[band & TC_PRIO_MAX]];
  58                }
  59                band = res.classid;
  60        }
  61        band = TC_H_MIN(band) - 1;
  62        if (band >= q->bands)
  63                return q->queues[q->prio2band[0]];
  64
  65        return q->queues[band];
  66}
  67
  68static int
  69prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  70{
  71        unsigned int len = qdisc_pkt_len(skb);
  72        struct Qdisc *qdisc;
  73        int ret;
  74
  75        qdisc = prio_classify(skb, sch, &ret);
  76#ifdef CONFIG_NET_CLS_ACT
  77        if (qdisc == NULL) {
  78
  79                if (ret & __NET_XMIT_BYPASS)
  80                        qdisc_qstats_drop(sch);
  81                __qdisc_drop(skb, to_free);
  82                return ret;
  83        }
  84#endif
  85
  86        ret = qdisc_enqueue(skb, qdisc, to_free);
  87        if (ret == NET_XMIT_SUCCESS) {
  88                sch->qstats.backlog += len;
  89                sch->q.qlen++;
  90                return NET_XMIT_SUCCESS;
  91        }
  92        if (net_xmit_drop_count(ret))
  93                qdisc_qstats_drop(sch);
  94        return ret;
  95}
  96
  97static struct sk_buff *prio_peek(struct Qdisc *sch)
  98{
  99        struct prio_sched_data *q = qdisc_priv(sch);
 100        int prio;
 101
 102        for (prio = 0; prio < q->bands; prio++) {
 103                struct Qdisc *qdisc = q->queues[prio];
 104                struct sk_buff *skb = qdisc->ops->peek(qdisc);
 105                if (skb)
 106                        return skb;
 107        }
 108        return NULL;
 109}
 110
 111static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 112{
 113        struct prio_sched_data *q = qdisc_priv(sch);
 114        int prio;
 115
 116        for (prio = 0; prio < q->bands; prio++) {
 117                struct Qdisc *qdisc = q->queues[prio];
 118                struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
 119                if (skb) {
 120                        qdisc_bstats_update(sch, skb);
 121                        qdisc_qstats_backlog_dec(sch, skb);
 122                        sch->q.qlen--;
 123                        return skb;
 124                }
 125        }
 126        return NULL;
 127
 128}
 129
 130static void
 131prio_reset(struct Qdisc *sch)
 132{
 133        int prio;
 134        struct prio_sched_data *q = qdisc_priv(sch);
 135
 136        for (prio = 0; prio < q->bands; prio++)
 137                qdisc_reset(q->queues[prio]);
 138        sch->qstats.backlog = 0;
 139        sch->q.qlen = 0;
 140}
 141
 142static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
 143{
 144        struct net_device *dev = qdisc_dev(sch);
 145        struct tc_prio_qopt_offload opt = {
 146                .handle = sch->handle,
 147                .parent = sch->parent,
 148        };
 149
 150        if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
 151                return -EOPNOTSUPP;
 152
 153        if (qopt) {
 154                opt.command = TC_PRIO_REPLACE;
 155                opt.replace_params.bands = qopt->bands;
 156                memcpy(&opt.replace_params.priomap, qopt->priomap,
 157                       TC_PRIO_MAX + 1);
 158                opt.replace_params.qstats = &sch->qstats;
 159        } else {
 160                opt.command = TC_PRIO_DESTROY;
 161        }
 162
 163        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
 164}
 165
 166static void
 167prio_destroy(struct Qdisc *sch)
 168{
 169        int prio;
 170        struct prio_sched_data *q = qdisc_priv(sch);
 171
 172        tcf_block_put(q->block);
 173        prio_offload(sch, NULL);
 174        for (prio = 0; prio < q->bands; prio++)
 175                qdisc_put(q->queues[prio]);
 176}
 177
 178static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 179                     struct netlink_ext_ack *extack)
 180{
 181        struct prio_sched_data *q = qdisc_priv(sch);
 182        struct Qdisc *queues[TCQ_PRIO_BANDS];
 183        int oldbands = q->bands, i;
 184        struct tc_prio_qopt *qopt;
 185
 186        if (nla_len(opt) < sizeof(*qopt))
 187                return -EINVAL;
 188        qopt = nla_data(opt);
 189
 190        if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
 191                return -EINVAL;
 192
 193        for (i = 0; i <= TC_PRIO_MAX; i++) {
 194                if (qopt->priomap[i] >= qopt->bands)
 195                        return -EINVAL;
 196        }
 197
 198        /* Before commit, make sure we can allocate all new qdiscs */
 199        for (i = oldbands; i < qopt->bands; i++) {
 200                queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 201                                              TC_H_MAKE(sch->handle, i + 1),
 202                                              extack);
 203                if (!queues[i]) {
 204                        while (i > oldbands)
 205                                qdisc_put(queues[--i]);
 206                        return -ENOMEM;
 207                }
 208        }
 209
 210        prio_offload(sch, qopt);
 211        sch_tree_lock(sch);
 212        q->bands = qopt->bands;
 213        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 214
 215        for (i = q->bands; i < oldbands; i++)
 216                qdisc_tree_flush_backlog(q->queues[i]);
 217
 218        for (i = oldbands; i < q->bands; i++) {
 219                q->queues[i] = queues[i];
 220                if (q->queues[i] != &noop_qdisc)
 221                        qdisc_hash_add(q->queues[i], true);
 222        }
 223
 224        sch_tree_unlock(sch);
 225
 226        for (i = q->bands; i < oldbands; i++)
 227                qdisc_put(q->queues[i]);
 228        return 0;
 229}
 230
 231static int prio_init(struct Qdisc *sch, struct nlattr *opt,
 232                     struct netlink_ext_ack *extack)
 233{
 234        struct prio_sched_data *q = qdisc_priv(sch);
 235        int err;
 236
 237        if (!opt)
 238                return -EINVAL;
 239
 240        err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
 241        if (err)
 242                return err;
 243
 244        return prio_tune(sch, opt, extack);
 245}
 246
 247static int prio_dump_offload(struct Qdisc *sch)
 248{
 249        struct tc_prio_qopt_offload hw_stats = {
 250                .command = TC_PRIO_STATS,
 251                .handle = sch->handle,
 252                .parent = sch->parent,
 253                {
 254                        .stats = {
 255                                .bstats = &sch->bstats,
 256                                .qstats = &sch->qstats,
 257                        },
 258                },
 259        };
 260
 261        return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_PRIO, &hw_stats);
 262}
 263
 264static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 265{
 266        struct prio_sched_data *q = qdisc_priv(sch);
 267        unsigned char *b = skb_tail_pointer(skb);
 268        struct tc_prio_qopt opt;
 269        int err;
 270
 271        opt.bands = q->bands;
 272        memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
 273
 274        err = prio_dump_offload(sch);
 275        if (err)
 276                goto nla_put_failure;
 277
 278        if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 279                goto nla_put_failure;
 280
 281        return skb->len;
 282
 283nla_put_failure:
 284        nlmsg_trim(skb, b);
 285        return -1;
 286}
 287
 288static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 289                      struct Qdisc **old, struct netlink_ext_ack *extack)
 290{
 291        struct prio_sched_data *q = qdisc_priv(sch);
 292        struct tc_prio_qopt_offload graft_offload;
 293        unsigned long band = arg - 1;
 294
 295        if (!new) {
 296                new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 297                                        TC_H_MAKE(sch->handle, arg), extack);
 298                if (!new)
 299                        new = &noop_qdisc;
 300                else
 301                        qdisc_hash_add(new, true);
 302        }
 303
 304        *old = qdisc_replace(sch, new, &q->queues[band]);
 305
 306        graft_offload.handle = sch->handle;
 307        graft_offload.parent = sch->parent;
 308        graft_offload.graft_params.band = band;
 309        graft_offload.graft_params.child_handle = new->handle;
 310        graft_offload.command = TC_PRIO_GRAFT;
 311
 312        qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
 313                                   TC_SETUP_QDISC_PRIO, &graft_offload,
 314                                   extack);
 315        return 0;
 316}
 317
 318static struct Qdisc *
 319prio_leaf(struct Qdisc *sch, unsigned long arg)
 320{
 321        struct prio_sched_data *q = qdisc_priv(sch);
 322        unsigned long band = arg - 1;
 323
 324        return q->queues[band];
 325}
 326
 327static unsigned long prio_find(struct Qdisc *sch, u32 classid)
 328{
 329        struct prio_sched_data *q = qdisc_priv(sch);
 330        unsigned long band = TC_H_MIN(classid);
 331
 332        if (band - 1 >= q->bands)
 333                return 0;
 334        return band;
 335}
 336
 337static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
 338{
 339        return prio_find(sch, classid);
 340}
 341
 342
 343static void prio_unbind(struct Qdisc *q, unsigned long cl)
 344{
 345}
 346
 347static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
 348                           struct tcmsg *tcm)
 349{
 350        struct prio_sched_data *q = qdisc_priv(sch);
 351
 352        tcm->tcm_handle |= TC_H_MIN(cl);
 353        tcm->tcm_info = q->queues[cl-1]->handle;
 354        return 0;
 355}
 356
 357static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 358                                 struct gnet_dump *d)
 359{
 360        struct prio_sched_data *q = qdisc_priv(sch);
 361        struct Qdisc *cl_q;
 362
 363        cl_q = q->queues[cl - 1];
 364        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 365                                  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
 366            qdisc_qstats_copy(d, cl_q) < 0)
 367                return -1;
 368
 369        return 0;
 370}
 371
 372static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 373{
 374        struct prio_sched_data *q = qdisc_priv(sch);
 375        int prio;
 376
 377        if (arg->stop)
 378                return;
 379
 380        for (prio = 0; prio < q->bands; prio++) {
 381                if (arg->count < arg->skip) {
 382                        arg->count++;
 383                        continue;
 384                }
 385                if (arg->fn(sch, prio + 1, arg) < 0) {
 386                        arg->stop = 1;
 387                        break;
 388                }
 389                arg->count++;
 390        }
 391}
 392
 393static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl,
 394                                        struct netlink_ext_ack *extack)
 395{
 396        struct prio_sched_data *q = qdisc_priv(sch);
 397
 398        if (cl)
 399                return NULL;
 400        return q->block;
 401}
 402
 403static const struct Qdisc_class_ops prio_class_ops = {
 404        .graft          =       prio_graft,
 405        .leaf           =       prio_leaf,
 406        .find           =       prio_find,
 407        .walk           =       prio_walk,
 408        .tcf_block      =       prio_tcf_block,
 409        .bind_tcf       =       prio_bind,
 410        .unbind_tcf     =       prio_unbind,
 411        .dump           =       prio_dump_class,
 412        .dump_stats     =       prio_dump_class_stats,
 413};
 414
 415static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 416        .next           =       NULL,
 417        .cl_ops         =       &prio_class_ops,
 418        .id             =       "prio",
 419        .priv_size      =       sizeof(struct prio_sched_data),
 420        .enqueue        =       prio_enqueue,
 421        .dequeue        =       prio_dequeue,
 422        .peek           =       prio_peek,
 423        .init           =       prio_init,
 424        .reset          =       prio_reset,
 425        .destroy        =       prio_destroy,
 426        .change         =       prio_tune,
 427        .dump           =       prio_dump,
 428        .owner          =       THIS_MODULE,
 429};
 430
 431static int __init prio_module_init(void)
 432{
 433        return register_qdisc(&prio_qdisc_ops);
 434}
 435
 436static void __exit prio_module_exit(void)
 437{
 438        unregister_qdisc(&prio_qdisc_ops);
 439}
 440
 441module_init(prio_module_init)
 442module_exit(prio_module_exit)
 443
 444MODULE_LICENSE("GPL");
 445