linux/net/sched/sch_mqprio.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_mqprio.c
   3 *
   4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * version 2 as published by the Free Software Foundation.
   9 */
  10
  11#include <linux/types.h>
  12#include <linux/slab.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/skbuff.h>
  17#include <linux/module.h>
  18#include <net/netlink.h>
  19#include <net/pkt_sched.h>
  20#include <net/sch_generic.h>
  21
  22struct mqprio_sched {
  23        struct Qdisc            **qdiscs;
  24        int hw_owned;
  25};
  26
  27static void mqprio_destroy(struct Qdisc *sch)
  28{
  29        struct net_device *dev = qdisc_dev(sch);
  30        struct mqprio_sched *priv = qdisc_priv(sch);
  31        struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
  32        unsigned int ntx;
  33
  34        if (priv->qdiscs) {
  35                for (ntx = 0;
  36                     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
  37                     ntx++)
  38                        qdisc_destroy(priv->qdiscs[ntx]);
  39                kfree(priv->qdiscs);
  40        }
  41
  42        if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
  43                dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
  44        else
  45                netdev_set_num_tc(dev, 0);
  46}
  47
  48static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
  49{
  50        int i, j;
  51
  52        /* Verify num_tc is not out of max range */
  53        if (qopt->num_tc > TC_MAX_QUEUE)
  54                return -EINVAL;
  55
  56        /* Verify priority mapping uses valid tcs */
  57        for (i = 0; i < TC_BITMASK + 1; i++) {
  58                if (qopt->prio_tc_map[i] >= qopt->num_tc)
  59                        return -EINVAL;
  60        }
  61
  62        /* net_device does not support requested operation */
  63        if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
  64                return -EINVAL;
  65
  66        /* if hw owned qcount and qoffset are taken from LLD so
  67         * no reason to verify them here
  68         */
  69        if (qopt->hw)
  70                return 0;
  71
  72        for (i = 0; i < qopt->num_tc; i++) {
  73                unsigned int last = qopt->offset[i] + qopt->count[i];
  74
  75                /* Verify the queue count is in tx range being equal to the
  76                 * real_num_tx_queues indicates the last queue is in use.
  77                 */
  78                if (qopt->offset[i] >= dev->real_num_tx_queues ||
  79                    !qopt->count[i] ||
  80                    last > dev->real_num_tx_queues)
  81                        return -EINVAL;
  82
  83                /* Verify that the offset and counts do not overlap */
  84                for (j = i + 1; j < qopt->num_tc; j++) {
  85                        if (last > qopt->offset[j])
  86                                return -EINVAL;
  87                }
  88        }
  89
  90        return 0;
  91}
  92
  93static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
  94{
  95        struct net_device *dev = qdisc_dev(sch);
  96        struct mqprio_sched *priv = qdisc_priv(sch);
  97        struct netdev_queue *dev_queue;
  98        struct Qdisc *qdisc;
  99        int i, err = -EOPNOTSUPP;
 100        struct tc_mqprio_qopt *qopt = NULL;
 101
 102        BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
 103        BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
 104
 105        if (sch->parent != TC_H_ROOT)
 106                return -EOPNOTSUPP;
 107
 108        if (!netif_is_multiqueue(dev))
 109                return -EOPNOTSUPP;
 110
 111        if (!opt || nla_len(opt) < sizeof(*qopt))
 112                return -EINVAL;
 113
 114        qopt = nla_data(opt);
 115        if (mqprio_parse_opt(dev, qopt))
 116                return -EINVAL;
 117
 118        /* pre-allocate qdisc, attachment can't fail */
 119        priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
 120                               GFP_KERNEL);
 121        if (priv->qdiscs == NULL) {
 122                err = -ENOMEM;
 123                goto err;
 124        }
 125
 126        for (i = 0; i < dev->num_tx_queues; i++) {
 127                dev_queue = netdev_get_tx_queue(dev, i);
 128                qdisc = qdisc_create_dflt(dev_queue,
 129                                          get_default_qdisc_ops(dev, i),
 130                                          TC_H_MAKE(TC_H_MAJ(sch->handle),
 131                                                    TC_H_MIN(i + 1)));
 132                if (qdisc == NULL) {
 133                        err = -ENOMEM;
 134                        goto err;
 135                }
 136                priv->qdiscs[i] = qdisc;
 137                qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 138        }
 139
 140        /* If the mqprio options indicate that hardware should own
 141         * the queue mapping then run ndo_setup_tc otherwise use the
 142         * supplied and verified mapping
 143         */
 144        if (qopt->hw) {
 145                struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
 146                                          { .tc = qopt->num_tc }};
 147
 148                priv->hw_owned = 1;
 149                err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
 150                if (err)
 151                        goto err;
 152        } else {
 153                netdev_set_num_tc(dev, qopt->num_tc);
 154                for (i = 0; i < qopt->num_tc; i++)
 155                        netdev_set_tc_queue(dev, i,
 156                                            qopt->count[i], qopt->offset[i]);
 157        }
 158
 159        /* Always use supplied priority mappings */
 160        for (i = 0; i < TC_BITMASK + 1; i++)
 161                netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
 162
 163        sch->flags |= TCQ_F_MQROOT;
 164        return 0;
 165
 166err:
 167        mqprio_destroy(sch);
 168        return err;
 169}
 170
 171static void mqprio_attach(struct Qdisc *sch)
 172{
 173        struct net_device *dev = qdisc_dev(sch);
 174        struct mqprio_sched *priv = qdisc_priv(sch);
 175        struct Qdisc *qdisc, *old;
 176        unsigned int ntx;
 177
 178        /* Attach underlying qdisc */
 179        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
 180                qdisc = priv->qdiscs[ntx];
 181                old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
 182                if (old)
 183                        qdisc_destroy(old);
 184                if (ntx < dev->real_num_tx_queues)
 185                        qdisc_list_add(qdisc);
 186        }
 187        kfree(priv->qdiscs);
 188        priv->qdiscs = NULL;
 189}
 190
 191static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
 192                                             unsigned long cl)
 193{
 194        struct net_device *dev = qdisc_dev(sch);
 195        unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
 196
 197        if (ntx >= dev->num_tx_queues)
 198                return NULL;
 199        return netdev_get_tx_queue(dev, ntx);
 200}
 201
 202static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 203                    struct Qdisc **old)
 204{
 205        struct net_device *dev = qdisc_dev(sch);
 206        struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 207
 208        if (!dev_queue)
 209                return -EINVAL;
 210
 211        if (dev->flags & IFF_UP)
 212                dev_deactivate(dev);
 213
 214        *old = dev_graft_qdisc(dev_queue, new);
 215
 216        if (new)
 217                new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 218
 219        if (dev->flags & IFF_UP)
 220                dev_activate(dev);
 221
 222        return 0;
 223}
 224
 225static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 226{
 227        struct net_device *dev = qdisc_dev(sch);
 228        struct mqprio_sched *priv = qdisc_priv(sch);
 229        unsigned char *b = skb_tail_pointer(skb);
 230        struct tc_mqprio_qopt opt = { 0 };
 231        struct Qdisc *qdisc;
 232        unsigned int i;
 233
 234        sch->q.qlen = 0;
 235        memset(&sch->bstats, 0, sizeof(sch->bstats));
 236        memset(&sch->qstats, 0, sizeof(sch->qstats));
 237
 238        for (i = 0; i < dev->num_tx_queues; i++) {
 239                qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 240                spin_lock_bh(qdisc_lock(qdisc));
 241                sch->q.qlen             += qdisc->q.qlen;
 242                sch->bstats.bytes       += qdisc->bstats.bytes;
 243                sch->bstats.packets     += qdisc->bstats.packets;
 244                sch->qstats.backlog     += qdisc->qstats.backlog;
 245                sch->qstats.drops       += qdisc->qstats.drops;
 246                sch->qstats.requeues    += qdisc->qstats.requeues;
 247                sch->qstats.overlimits  += qdisc->qstats.overlimits;
 248                spin_unlock_bh(qdisc_lock(qdisc));
 249        }
 250
 251        opt.num_tc = netdev_get_num_tc(dev);
 252        memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
 253        opt.hw = priv->hw_owned;
 254
 255        for (i = 0; i < netdev_get_num_tc(dev); i++) {
 256                opt.count[i] = dev->tc_to_txq[i].count;
 257                opt.offset[i] = dev->tc_to_txq[i].offset;
 258        }
 259
 260        if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 261                goto nla_put_failure;
 262
 263        return skb->len;
 264nla_put_failure:
 265        nlmsg_trim(skb, b);
 266        return -1;
 267}
 268
 269static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
 270{
 271        struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 272
 273        if (!dev_queue)
 274                return NULL;
 275
 276        return dev_queue->qdisc_sleeping;
 277}
 278
 279static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
 280{
 281        struct net_device *dev = qdisc_dev(sch);
 282        unsigned int ntx = TC_H_MIN(classid);
 283
 284        if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
 285                return 0;
 286        return ntx;
 287}
 288
 289static void mqprio_put(struct Qdisc *sch, unsigned long cl)
 290{
 291}
 292
 293static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 294                         struct sk_buff *skb, struct tcmsg *tcm)
 295{
 296        struct net_device *dev = qdisc_dev(sch);
 297
 298        if (cl <= netdev_get_num_tc(dev)) {
 299                tcm->tcm_parent = TC_H_ROOT;
 300                tcm->tcm_info = 0;
 301        } else {
 302                int i;
 303                struct netdev_queue *dev_queue;
 304
 305                dev_queue = mqprio_queue_get(sch, cl);
 306                tcm->tcm_parent = 0;
 307                for (i = 0; i < netdev_get_num_tc(dev); i++) {
 308                        struct netdev_tc_txq tc = dev->tc_to_txq[i];
 309                        int q_idx = cl - netdev_get_num_tc(dev);
 310
 311                        if (q_idx > tc.offset &&
 312                            q_idx <= tc.offset + tc.count) {
 313                                tcm->tcm_parent =
 314                                        TC_H_MAKE(TC_H_MAJ(sch->handle),
 315                                                  TC_H_MIN(i + 1));
 316                                break;
 317                        }
 318                }
 319                tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
 320        }
 321        tcm->tcm_handle |= TC_H_MIN(cl);
 322        return 0;
 323}
 324
 325static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 326                                   struct gnet_dump *d)
 327        __releases(d->lock)
 328        __acquires(d->lock)
 329{
 330        struct net_device *dev = qdisc_dev(sch);
 331
 332        if (cl <= netdev_get_num_tc(dev)) {
 333                int i;
 334                __u32 qlen = 0;
 335                struct Qdisc *qdisc;
 336                struct gnet_stats_queue qstats = {0};
 337                struct gnet_stats_basic_packed bstats = {0};
 338                struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
 339
 340                /* Drop lock here it will be reclaimed before touching
 341                 * statistics this is required because the d->lock we
 342                 * hold here is the look on dev_queue->qdisc_sleeping
 343                 * also acquired below.
 344                 */
 345                spin_unlock_bh(d->lock);
 346
 347                for (i = tc.offset; i < tc.offset + tc.count; i++) {
 348                        struct netdev_queue *q = netdev_get_tx_queue(dev, i);
 349
 350                        qdisc = rtnl_dereference(q->qdisc);
 351                        spin_lock_bh(qdisc_lock(qdisc));
 352                        qlen              += qdisc->q.qlen;
 353                        bstats.bytes      += qdisc->bstats.bytes;
 354                        bstats.packets    += qdisc->bstats.packets;
 355                        qstats.backlog    += qdisc->qstats.backlog;
 356                        qstats.drops      += qdisc->qstats.drops;
 357                        qstats.requeues   += qdisc->qstats.requeues;
 358                        qstats.overlimits += qdisc->qstats.overlimits;
 359                        spin_unlock_bh(qdisc_lock(qdisc));
 360                }
 361                /* Reclaim root sleeping lock before completing stats */
 362                spin_lock_bh(d->lock);
 363                if (gnet_stats_copy_basic(d, NULL, &bstats) < 0 ||
 364                    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 365                        return -1;
 366        } else {
 367                struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 368
 369                sch = dev_queue->qdisc_sleeping;
 370                if (gnet_stats_copy_basic(d, NULL, &sch->bstats) < 0 ||
 371                    gnet_stats_copy_queue(d, NULL,
 372                                          &sch->qstats, sch->q.qlen) < 0)
 373                        return -1;
 374        }
 375        return 0;
 376}
 377
 378static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 379{
 380        struct net_device *dev = qdisc_dev(sch);
 381        unsigned long ntx;
 382
 383        if (arg->stop)
 384                return;
 385
 386        /* Walk hierarchy with a virtual class per tc */
 387        arg->count = arg->skip;
 388        for (ntx = arg->skip;
 389             ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
 390             ntx++) {
 391                if (arg->fn(sch, ntx + 1, arg) < 0) {
 392                        arg->stop = 1;
 393                        break;
 394                }
 395                arg->count++;
 396        }
 397}
 398
 399static const struct Qdisc_class_ops mqprio_class_ops = {
 400        .graft          = mqprio_graft,
 401        .leaf           = mqprio_leaf,
 402        .get            = mqprio_get,
 403        .put            = mqprio_put,
 404        .walk           = mqprio_walk,
 405        .dump           = mqprio_dump_class,
 406        .dump_stats     = mqprio_dump_class_stats,
 407};
 408
 409static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
 410        .cl_ops         = &mqprio_class_ops,
 411        .id             = "mqprio",
 412        .priv_size      = sizeof(struct mqprio_sched),
 413        .init           = mqprio_init,
 414        .destroy        = mqprio_destroy,
 415        .attach         = mqprio_attach,
 416        .dump           = mqprio_dump,
 417        .owner          = THIS_MODULE,
 418};
 419
 420static int __init mqprio_module_init(void)
 421{
 422        return register_qdisc(&mqprio_qdisc_ops);
 423}
 424
 425static void __exit mqprio_module_exit(void)
 426{
 427        unregister_qdisc(&mqprio_qdisc_ops);
 428}
 429
 430module_init(mqprio_module_init);
 431module_exit(mqprio_module_exit);
 432
 433MODULE_LICENSE("GPL");
 434