1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/string.h>
22#include <linux/errno.h>
23#include <linux/skbuff.h>
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
29#include <linux/hrtimer.h>
30#include <linux/slab.h>
31#include <linux/hashtable.h>
32
33#include <net/net_namespace.h>
34#include <net/sock.h>
35#include <net/netlink.h>
36#include <net/pkt_sched.h>
37#include <net/pkt_cls.h>
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121
122
123
124
125
126
127
128static struct Qdisc_ops *qdisc_base;
129
130
131
132int register_qdisc(struct Qdisc_ops *qops)
133{
134 struct Qdisc_ops *q, **qp;
135 int rc = -EEXIST;
136
137 write_lock(&qdisc_mod_lock);
138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139 if (!strcmp(qops->id, q->id))
140 goto out;
141
142 if (qops->enqueue == NULL)
143 qops->enqueue = noop_qdisc_ops.enqueue;
144 if (qops->peek == NULL) {
145 if (qops->dequeue == NULL)
146 qops->peek = noop_qdisc_ops.peek;
147 else
148 goto out_einval;
149 }
150 if (qops->dequeue == NULL)
151 qops->dequeue = noop_qdisc_ops.dequeue;
152
153 if (qops->cl_ops) {
154 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
156 if (!(cops->find && cops->walk && cops->leaf))
157 goto out_einval;
158
159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
160 goto out_einval;
161 }
162
163 qops->next = NULL;
164 *qp = qops;
165 rc = 0;
166out:
167 write_unlock(&qdisc_mod_lock);
168 return rc;
169
170out_einval:
171 rc = -EINVAL;
172 goto out;
173}
174EXPORT_SYMBOL(register_qdisc);
175
176int unregister_qdisc(struct Qdisc_ops *qops)
177{
178 struct Qdisc_ops *q, **qp;
179 int err = -ENOENT;
180
181 write_lock(&qdisc_mod_lock);
182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
183 if (q == qops)
184 break;
185 if (q) {
186 *qp = q->next;
187 q->next = NULL;
188 err = 0;
189 }
190 write_unlock(&qdisc_mod_lock);
191 return err;
192}
193EXPORT_SYMBOL(unregister_qdisc);
194
195
196void qdisc_get_default(char *name, size_t len)
197{
198 read_lock(&qdisc_mod_lock);
199 strlcpy(name, default_qdisc_ops->id, len);
200 read_unlock(&qdisc_mod_lock);
201}
202
203static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204{
205 struct Qdisc_ops *q = NULL;
206
207 for (q = qdisc_base; q; q = q->next) {
208 if (!strcmp(name, q->id)) {
209 if (!try_module_get(q->owner))
210 q = NULL;
211 break;
212 }
213 }
214
215 return q;
216}
217
218
219int qdisc_set_default(const char *name)
220{
221 const struct Qdisc_ops *ops;
222
223 if (!capable(CAP_NET_ADMIN))
224 return -EPERM;
225
226 write_lock(&qdisc_mod_lock);
227 ops = qdisc_lookup_default(name);
228 if (!ops) {
229
230 write_unlock(&qdisc_mod_lock);
231 request_module("sch_%s", name);
232 write_lock(&qdisc_mod_lock);
233
234 ops = qdisc_lookup_default(name);
235 }
236
237 if (ops) {
238
239 module_put(default_qdisc_ops->owner);
240 default_qdisc_ops = ops;
241 }
242 write_unlock(&qdisc_mod_lock);
243
244 return ops ? 0 : -ENOENT;
245}
246
247#ifdef CONFIG_NET_SCH_DEFAULT
248
249static int __init sch_default_qdisc(void)
250{
251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252}
253late_initcall(sch_default_qdisc);
254#endif
255
256
257
258
259
260
261static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
262{
263 struct Qdisc *q;
264
265 if (!qdisc_dev(root))
266 return (root->handle == handle ? root : NULL);
267
268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
273 if (q->handle == handle)
274 return q;
275 }
276 return NULL;
277}
278
279void qdisc_hash_add(struct Qdisc *q, bool invisible)
280{
281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
282 ASSERT_RTNL();
283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
284 if (invisible)
285 q->flags |= TCQ_F_INVISIBLE;
286 }
287}
288EXPORT_SYMBOL(qdisc_hash_add);
289
290void qdisc_hash_del(struct Qdisc *q)
291{
292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293 ASSERT_RTNL();
294 hash_del_rcu(&q->hash);
295 }
296}
297EXPORT_SYMBOL(qdisc_hash_del);
298
299struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
300{
301 struct Qdisc *q;
302
303 if (!handle)
304 return NULL;
305 q = qdisc_match_from_root(dev->qdisc, handle);
306 if (q)
307 goto out;
308
309 if (dev_ingress_queue(dev))
310 q = qdisc_match_from_root(
311 dev_ingress_queue(dev)->qdisc_sleeping,
312 handle);
313out:
314 return q;
315}
316
317struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318{
319 struct netdev_queue *nq;
320 struct Qdisc *q;
321
322 if (!handle)
323 return NULL;
324 q = qdisc_match_from_root(dev->qdisc, handle);
325 if (q)
326 goto out;
327
328 nq = dev_ingress_queue_rcu(dev);
329 if (nq)
330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331out:
332 return q;
333}
334
335static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336{
337 unsigned long cl;
338 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
339
340 if (cops == NULL)
341 return NULL;
342 cl = cops->find(p, classid);
343
344 if (cl == 0)
345 return NULL;
346 return cops->leaf(p, cl);
347}
348
349
350
351static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
352{
353 struct Qdisc_ops *q = NULL;
354
355 if (kind) {
356 read_lock(&qdisc_mod_lock);
357 for (q = qdisc_base; q; q = q->next) {
358 if (nla_strcmp(kind, q->id) == 0) {
359 if (!try_module_get(q->owner))
360 q = NULL;
361 break;
362 }
363 }
364 read_unlock(&qdisc_mod_lock);
365 }
366 return q;
367}
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387{
388 int low = roundup(r->mpu, 48);
389 int high = roundup(low+1, 48);
390 int cell_low = low >> r->cell_log;
391 int cell_high = (high >> r->cell_log) - 1;
392
393
394 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395 pr_debug("TC linklayer: Giving up ATM detection\n");
396 return TC_LINKLAYER_ETHERNET;
397 }
398
399 if ((cell_high > cell_low) && (cell_high < 256)
400 && (rtab[cell_low] == rtab[cell_high])) {
401 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402 cell_low, cell_high, rtab[cell_high]);
403 return TC_LINKLAYER_ATM;
404 }
405 return TC_LINKLAYER_ETHERNET;
406}
407
408static struct qdisc_rate_table *qdisc_rtab_list;
409
410struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
411 struct nlattr *tab,
412 struct netlink_ext_ack *extack)
413{
414 struct qdisc_rate_table *rtab;
415
416 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
417 nla_len(tab) != TC_RTAB_SIZE) {
418 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419 return NULL;
420 }
421
422 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424 !memcmp(&rtab->data, nla_data(tab), 1024)) {
425 rtab->refcnt++;
426 return rtab;
427 }
428 }
429
430 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431 if (rtab) {
432 rtab->rate = *r;
433 rtab->refcnt = 1;
434 memcpy(rtab->data, nla_data(tab), 1024);
435 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436 r->linklayer = __detect_linklayer(r, rtab->data);
437 rtab->next = qdisc_rtab_list;
438 qdisc_rtab_list = rtab;
439 } else {
440 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441 }
442 return rtab;
443}
444EXPORT_SYMBOL(qdisc_get_rtab);
445
446void qdisc_put_rtab(struct qdisc_rate_table *tab)
447{
448 struct qdisc_rate_table *rtab, **rtabp;
449
450 if (!tab || --tab->refcnt)
451 return;
452
453 for (rtabp = &qdisc_rtab_list;
454 (rtab = *rtabp) != NULL;
455 rtabp = &rtab->next) {
456 if (rtab == tab) {
457 *rtabp = rtab->next;
458 kfree(rtab);
459 return;
460 }
461 }
462}
463EXPORT_SYMBOL(qdisc_put_rtab);
464
465static LIST_HEAD(qdisc_stab_list);
466
467static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469 [TCA_STAB_DATA] = { .type = NLA_BINARY },
470};
471
472static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473 struct netlink_ext_ack *extack)
474{
475 struct nlattr *tb[TCA_STAB_MAX + 1];
476 struct qdisc_size_table *stab;
477 struct tc_sizespec *s;
478 unsigned int tsize = 0;
479 u16 *tab = NULL;
480 int err;
481
482 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
483 if (err < 0)
484 return ERR_PTR(err);
485 if (!tb[TCA_STAB_BASE]) {
486 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
487 return ERR_PTR(-EINVAL);
488 }
489
490 s = nla_data(tb[TCA_STAB_BASE]);
491
492 if (s->tsize > 0) {
493 if (!tb[TCA_STAB_DATA]) {
494 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
495 return ERR_PTR(-EINVAL);
496 }
497 tab = nla_data(tb[TCA_STAB_DATA]);
498 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
499 }
500
501 if (tsize != s->tsize || (!tab && tsize > 0)) {
502 NL_SET_ERR_MSG(extack, "Invalid size of size table");
503 return ERR_PTR(-EINVAL);
504 }
505
506 list_for_each_entry(stab, &qdisc_stab_list, list) {
507 if (memcmp(&stab->szopts, s, sizeof(*s)))
508 continue;
509 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
510 continue;
511 stab->refcnt++;
512 return stab;
513 }
514
515 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
516 if (!stab)
517 return ERR_PTR(-ENOMEM);
518
519 stab->refcnt = 1;
520 stab->szopts = *s;
521 if (tsize > 0)
522 memcpy(stab->data, tab, tsize * sizeof(u16));
523
524 list_add_tail(&stab->list, &qdisc_stab_list);
525
526 return stab;
527}
528
529static void stab_kfree_rcu(struct rcu_head *head)
530{
531 kfree(container_of(head, struct qdisc_size_table, rcu));
532}
533
534void qdisc_put_stab(struct qdisc_size_table *tab)
535{
536 if (!tab)
537 return;
538
539 if (--tab->refcnt == 0) {
540 list_del(&tab->list);
541 call_rcu(&tab->rcu, stab_kfree_rcu);
542 }
543}
544EXPORT_SYMBOL(qdisc_put_stab);
545
546static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
547{
548 struct nlattr *nest;
549
550 nest = nla_nest_start(skb, TCA_STAB);
551 if (nest == NULL)
552 goto nla_put_failure;
553 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
554 goto nla_put_failure;
555 nla_nest_end(skb, nest);
556
557 return skb->len;
558
559nla_put_failure:
560 return -1;
561}
562
563void __qdisc_calculate_pkt_len(struct sk_buff *skb,
564 const struct qdisc_size_table *stab)
565{
566 int pkt_len, slot;
567
568 pkt_len = skb->len + stab->szopts.overhead;
569 if (unlikely(!stab->szopts.tsize))
570 goto out;
571
572 slot = pkt_len + stab->szopts.cell_align;
573 if (unlikely(slot < 0))
574 slot = 0;
575
576 slot >>= stab->szopts.cell_log;
577 if (likely(slot < stab->szopts.tsize))
578 pkt_len = stab->data[slot];
579 else
580 pkt_len = stab->data[stab->szopts.tsize - 1] *
581 (slot / stab->szopts.tsize) +
582 stab->data[slot % stab->szopts.tsize];
583
584 pkt_len <<= stab->szopts.size_log;
585out:
586 if (unlikely(pkt_len < 1))
587 pkt_len = 1;
588 qdisc_skb_cb(skb)->pkt_len = pkt_len;
589}
590EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
591
592void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
593{
594 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
595 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
596 txt, qdisc->ops->id, qdisc->handle >> 16);
597 qdisc->flags |= TCQ_F_WARN_NONWC;
598 }
599}
600EXPORT_SYMBOL(qdisc_warn_nonwc);
601
602static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
603{
604 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
605 timer);
606
607 rcu_read_lock();
608 __netif_schedule(qdisc_root(wd->qdisc));
609 rcu_read_unlock();
610
611 return HRTIMER_NORESTART;
612}
613
614void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
615 clockid_t clockid)
616{
617 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
618 wd->timer.function = qdisc_watchdog;
619 wd->qdisc = qdisc;
620}
621EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
622
623void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
624{
625 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
626}
627EXPORT_SYMBOL(qdisc_watchdog_init);
628
629void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
630{
631 if (test_bit(__QDISC_STATE_DEACTIVATED,
632 &qdisc_root_sleeping(wd->qdisc)->state))
633 return;
634
635 if (wd->last_expires == expires)
636 return;
637
638 wd->last_expires = expires;
639 hrtimer_start(&wd->timer,
640 ns_to_ktime(expires),
641 HRTIMER_MODE_ABS_PINNED);
642}
643EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
644
645void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
646{
647 hrtimer_cancel(&wd->timer);
648}
649EXPORT_SYMBOL(qdisc_watchdog_cancel);
650
651static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
652{
653 struct hlist_head *h;
654 unsigned int i;
655
656 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
657
658 if (h != NULL) {
659 for (i = 0; i < n; i++)
660 INIT_HLIST_HEAD(&h[i]);
661 }
662 return h;
663}
664
665void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
666{
667 struct Qdisc_class_common *cl;
668 struct hlist_node *next;
669 struct hlist_head *nhash, *ohash;
670 unsigned int nsize, nmask, osize;
671 unsigned int i, h;
672
673
674 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
675 return;
676 nsize = clhash->hashsize * 2;
677 nmask = nsize - 1;
678 nhash = qdisc_class_hash_alloc(nsize);
679 if (nhash == NULL)
680 return;
681
682 ohash = clhash->hash;
683 osize = clhash->hashsize;
684
685 sch_tree_lock(sch);
686 for (i = 0; i < osize; i++) {
687 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
688 h = qdisc_class_hash(cl->classid, nmask);
689 hlist_add_head(&cl->hnode, &nhash[h]);
690 }
691 }
692 clhash->hash = nhash;
693 clhash->hashsize = nsize;
694 clhash->hashmask = nmask;
695 sch_tree_unlock(sch);
696
697 kvfree(ohash);
698}
699EXPORT_SYMBOL(qdisc_class_hash_grow);
700
701int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
702{
703 unsigned int size = 4;
704
705 clhash->hash = qdisc_class_hash_alloc(size);
706 if (!clhash->hash)
707 return -ENOMEM;
708 clhash->hashsize = size;
709 clhash->hashmask = size - 1;
710 clhash->hashelems = 0;
711 return 0;
712}
713EXPORT_SYMBOL(qdisc_class_hash_init);
714
715void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
716{
717 kvfree(clhash->hash);
718}
719EXPORT_SYMBOL(qdisc_class_hash_destroy);
720
721void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
722 struct Qdisc_class_common *cl)
723{
724 unsigned int h;
725
726 INIT_HLIST_NODE(&cl->hnode);
727 h = qdisc_class_hash(cl->classid, clhash->hashmask);
728 hlist_add_head(&cl->hnode, &clhash->hash[h]);
729 clhash->hashelems++;
730}
731EXPORT_SYMBOL(qdisc_class_hash_insert);
732
733void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
734 struct Qdisc_class_common *cl)
735{
736 hlist_del(&cl->hnode);
737 clhash->hashelems--;
738}
739EXPORT_SYMBOL(qdisc_class_hash_remove);
740
741
742
743
744static u32 qdisc_alloc_handle(struct net_device *dev)
745{
746 int i = 0x8000;
747 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
748
749 do {
750 autohandle += TC_H_MAKE(0x10000U, 0);
751 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
752 autohandle = TC_H_MAKE(0x80000000U, 0);
753 if (!qdisc_lookup(dev, autohandle))
754 return autohandle;
755 cond_resched();
756 } while (--i > 0);
757
758 return 0;
759}
760
761void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
762 unsigned int len)
763{
764 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
765 const struct Qdisc_class_ops *cops;
766 unsigned long cl;
767 u32 parentid;
768 bool notify;
769 int drops;
770
771 if (n == 0 && len == 0)
772 return;
773 drops = max_t(int, n, 0);
774 rcu_read_lock();
775 while ((parentid = sch->parent)) {
776 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
777 break;
778
779 if (sch->flags & TCQ_F_NOPARENT)
780 break;
781
782
783
784
785
786
787
788
789
790 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
791 !qdisc_is_offloaded);
792
793 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
794 if (sch == NULL) {
795 WARN_ON_ONCE(parentid != TC_H_ROOT);
796 break;
797 }
798 cops = sch->ops->cl_ops;
799 if (notify && cops->qlen_notify) {
800 cl = cops->find(sch, parentid);
801 cops->qlen_notify(sch, cl);
802 }
803 sch->q.qlen -= n;
804 sch->qstats.backlog -= len;
805 __qdisc_qstats_drop(sch, drops);
806 }
807 rcu_read_unlock();
808}
809EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
810
811int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
812 void *type_data)
813{
814 struct net_device *dev = qdisc_dev(sch);
815 int err;
816
817 sch->flags &= ~TCQ_F_OFFLOADED;
818 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
819 return 0;
820
821 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
822 if (err == -EOPNOTSUPP)
823 return 0;
824
825 if (!err)
826 sch->flags |= TCQ_F_OFFLOADED;
827
828 return err;
829}
830EXPORT_SYMBOL(qdisc_offload_dump_helper);
831
832void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
833 struct Qdisc *new, struct Qdisc *old,
834 enum tc_setup_type type, void *type_data,
835 struct netlink_ext_ack *extack)
836{
837 bool any_qdisc_is_offloaded;
838 int err;
839
840 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
841 return;
842
843 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
844
845
846 if (!err || !new || new == &noop_qdisc)
847 return;
848
849
850
851
852 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
853 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
854 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
855
856 if (any_qdisc_is_offloaded)
857 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
858}
859EXPORT_SYMBOL(qdisc_offload_graft_helper);
860
861static void qdisc_offload_graft_root(struct net_device *dev,
862 struct Qdisc *new, struct Qdisc *old,
863 struct netlink_ext_ack *extack)
864{
865 struct tc_root_qopt_offload graft_offload = {
866 .command = TC_ROOT_GRAFT,
867 .handle = new ? new->handle : 0,
868 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
869 (old && old->flags & TCQ_F_INGRESS),
870 };
871
872 qdisc_offload_graft_helper(dev, NULL, new, old,
873 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
874}
875
876static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
877 u32 portid, u32 seq, u16 flags, int event)
878{
879 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
880 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
881 struct tcmsg *tcm;
882 struct nlmsghdr *nlh;
883 unsigned char *b = skb_tail_pointer(skb);
884 struct gnet_dump d;
885 struct qdisc_size_table *stab;
886 u32 block_index;
887 __u32 qlen;
888
889 cond_resched();
890 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
891 if (!nlh)
892 goto out_nlmsg_trim;
893 tcm = nlmsg_data(nlh);
894 tcm->tcm_family = AF_UNSPEC;
895 tcm->tcm__pad1 = 0;
896 tcm->tcm__pad2 = 0;
897 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
898 tcm->tcm_parent = clid;
899 tcm->tcm_handle = q->handle;
900 tcm->tcm_info = refcount_read(&q->refcnt);
901 if (nla_put_string(skb, TCA_KIND, q->ops->id))
902 goto nla_put_failure;
903 if (q->ops->ingress_block_get) {
904 block_index = q->ops->ingress_block_get(q);
905 if (block_index &&
906 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
907 goto nla_put_failure;
908 }
909 if (q->ops->egress_block_get) {
910 block_index = q->ops->egress_block_get(q);
911 if (block_index &&
912 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
913 goto nla_put_failure;
914 }
915 if (q->ops->dump && q->ops->dump(q, skb) < 0)
916 goto nla_put_failure;
917 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
918 goto nla_put_failure;
919 qlen = qdisc_qlen_sum(q);
920
921 stab = rtnl_dereference(q->stab);
922 if (stab && qdisc_dump_stab(skb, stab) < 0)
923 goto nla_put_failure;
924
925 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
926 NULL, &d, TCA_PAD) < 0)
927 goto nla_put_failure;
928
929 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
930 goto nla_put_failure;
931
932 if (qdisc_is_percpu_stats(q)) {
933 cpu_bstats = q->cpu_bstats;
934 cpu_qstats = q->cpu_qstats;
935 }
936
937 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
938 &d, cpu_bstats, &q->bstats) < 0 ||
939 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
940 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
941 goto nla_put_failure;
942
943 if (gnet_stats_finish_copy(&d) < 0)
944 goto nla_put_failure;
945
946 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
947 return skb->len;
948
949out_nlmsg_trim:
950nla_put_failure:
951 nlmsg_trim(skb, b);
952 return -1;
953}
954
955static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
956{
957 if (q->flags & TCQ_F_BUILTIN)
958 return true;
959 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
960 return true;
961
962 return false;
963}
964
965static int qdisc_notify(struct net *net, struct sk_buff *oskb,
966 struct nlmsghdr *n, u32 clid,
967 struct Qdisc *old, struct Qdisc *new)
968{
969 struct sk_buff *skb;
970 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
971
972 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
973 if (!skb)
974 return -ENOBUFS;
975
976 if (old && !tc_qdisc_dump_ignore(old, false)) {
977 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
978 0, RTM_DELQDISC) < 0)
979 goto err_out;
980 }
981 if (new && !tc_qdisc_dump_ignore(new, false)) {
982 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
983 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
984 goto err_out;
985 }
986
987 if (skb->len)
988 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
989 n->nlmsg_flags & NLM_F_ECHO);
990
991err_out:
992 kfree_skb(skb);
993 return -EINVAL;
994}
995
996static void notify_and_destroy(struct net *net, struct sk_buff *skb,
997 struct nlmsghdr *n, u32 clid,
998 struct Qdisc *old, struct Qdisc *new)
999{
1000 if (new || old)
1001 qdisc_notify(net, skb, n, clid, old, new);
1002
1003 if (old)
1004 qdisc_put(old);
1005}
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1017 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1018 struct Qdisc *new, struct Qdisc *old,
1019 struct netlink_ext_ack *extack)
1020{
1021 struct Qdisc *q = old;
1022 struct net *net = dev_net(dev);
1023
1024 if (parent == NULL) {
1025 unsigned int i, num_q, ingress;
1026
1027 ingress = 0;
1028 num_q = dev->num_tx_queues;
1029 if ((q && q->flags & TCQ_F_INGRESS) ||
1030 (new && new->flags & TCQ_F_INGRESS)) {
1031 num_q = 1;
1032 ingress = 1;
1033 if (!dev_ingress_queue(dev)) {
1034 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1035 return -ENOENT;
1036 }
1037 }
1038
1039 if (dev->flags & IFF_UP)
1040 dev_deactivate(dev);
1041
1042 qdisc_offload_graft_root(dev, new, old, extack);
1043
1044 if (new && new->ops->attach)
1045 goto skip;
1046
1047 for (i = 0; i < num_q; i++) {
1048 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1049
1050 if (!ingress)
1051 dev_queue = netdev_get_tx_queue(dev, i);
1052
1053 old = dev_graft_qdisc(dev_queue, new);
1054 if (new && i > 0)
1055 qdisc_refcount_inc(new);
1056
1057 if (!ingress)
1058 qdisc_put(old);
1059 }
1060
1061skip:
1062 if (!ingress) {
1063 notify_and_destroy(net, skb, n, classid,
1064 dev->qdisc, new);
1065 if (new && !new->ops->attach)
1066 qdisc_refcount_inc(new);
1067 dev->qdisc = new ? : &noop_qdisc;
1068
1069 if (new && new->ops->attach)
1070 new->ops->attach(new);
1071 } else {
1072 notify_and_destroy(net, skb, n, classid, old, new);
1073 }
1074
1075 if (dev->flags & IFF_UP)
1076 dev_activate(dev);
1077 } else {
1078 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1079 unsigned long cl;
1080 int err;
1081
1082
1083 if (new && (new->flags & TCQ_F_NOLOCK) &&
1084 parent && !(parent->flags & TCQ_F_NOLOCK))
1085 new->flags &= ~TCQ_F_NOLOCK;
1086
1087 if (!cops || !cops->graft)
1088 return -EOPNOTSUPP;
1089
1090 cl = cops->find(parent, classid);
1091 if (!cl) {
1092 NL_SET_ERR_MSG(extack, "Specified class not found");
1093 return -ENOENT;
1094 }
1095
1096 err = cops->graft(parent, cl, new, &old, extack);
1097 if (err)
1098 return err;
1099 notify_and_destroy(net, skb, n, classid, old, new);
1100 }
1101 return 0;
1102}
1103
1104static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1105 struct netlink_ext_ack *extack)
1106{
1107 u32 block_index;
1108
1109 if (tca[TCA_INGRESS_BLOCK]) {
1110 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1111
1112 if (!block_index) {
1113 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1114 return -EINVAL;
1115 }
1116 if (!sch->ops->ingress_block_set) {
1117 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1118 return -EOPNOTSUPP;
1119 }
1120 sch->ops->ingress_block_set(sch, block_index);
1121 }
1122 if (tca[TCA_EGRESS_BLOCK]) {
1123 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1124
1125 if (!block_index) {
1126 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1127 return -EINVAL;
1128 }
1129 if (!sch->ops->egress_block_set) {
1130 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1131 return -EOPNOTSUPP;
1132 }
1133 sch->ops->egress_block_set(sch, block_index);
1134 }
1135 return 0;
1136}
1137
1138
1139
1140
1141
1142
1143
1144static struct Qdisc *qdisc_create(struct net_device *dev,
1145 struct netdev_queue *dev_queue,
1146 struct Qdisc *p, u32 parent, u32 handle,
1147 struct nlattr **tca, int *errp,
1148 struct netlink_ext_ack *extack)
1149{
1150 int err;
1151 struct nlattr *kind = tca[TCA_KIND];
1152 struct Qdisc *sch;
1153 struct Qdisc_ops *ops;
1154 struct qdisc_size_table *stab;
1155
1156 ops = qdisc_lookup_ops(kind);
1157#ifdef CONFIG_MODULES
1158 if (ops == NULL && kind != NULL) {
1159 char name[IFNAMSIZ];
1160 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1161
1162
1163
1164
1165
1166
1167
1168
1169 rtnl_unlock();
1170 request_module("sch_%s", name);
1171 rtnl_lock();
1172 ops = qdisc_lookup_ops(kind);
1173 if (ops != NULL) {
1174
1175
1176
1177 module_put(ops->owner);
1178 err = -EAGAIN;
1179 goto err_out;
1180 }
1181 }
1182 }
1183#endif
1184
1185 err = -ENOENT;
1186 if (!ops) {
1187 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1188 goto err_out;
1189 }
1190
1191 sch = qdisc_alloc(dev_queue, ops, extack);
1192 if (IS_ERR(sch)) {
1193 err = PTR_ERR(sch);
1194 goto err_out2;
1195 }
1196
1197 sch->parent = parent;
1198
1199 if (handle == TC_H_INGRESS) {
1200 sch->flags |= TCQ_F_INGRESS;
1201 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1202 } else {
1203 if (handle == 0) {
1204 handle = qdisc_alloc_handle(dev);
1205 err = -ENOMEM;
1206 if (handle == 0)
1207 goto err_out3;
1208 }
1209 if (!netif_is_multiqueue(dev))
1210 sch->flags |= TCQ_F_ONETXQUEUE;
1211 }
1212
1213 sch->handle = handle;
1214
1215
1216
1217
1218
1219
1220
1221 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1222 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1223 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1224 }
1225
1226 err = qdisc_block_indexes_set(sch, tca, extack);
1227 if (err)
1228 goto err_out3;
1229
1230 if (ops->init) {
1231 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1232 if (err != 0)
1233 goto err_out5;
1234 }
1235
1236 if (tca[TCA_STAB]) {
1237 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1238 if (IS_ERR(stab)) {
1239 err = PTR_ERR(stab);
1240 goto err_out4;
1241 }
1242 rcu_assign_pointer(sch->stab, stab);
1243 }
1244 if (tca[TCA_RATE]) {
1245 seqcount_t *running;
1246
1247 err = -EOPNOTSUPP;
1248 if (sch->flags & TCQ_F_MQROOT) {
1249 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1250 goto err_out4;
1251 }
1252
1253 if (sch->parent != TC_H_ROOT &&
1254 !(sch->flags & TCQ_F_INGRESS) &&
1255 (!p || !(p->flags & TCQ_F_MQROOT)))
1256 running = qdisc_root_sleeping_running(sch);
1257 else
1258 running = &sch->running;
1259
1260 err = gen_new_estimator(&sch->bstats,
1261 sch->cpu_bstats,
1262 &sch->rate_est,
1263 NULL,
1264 running,
1265 tca[TCA_RATE]);
1266 if (err) {
1267 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1268 goto err_out4;
1269 }
1270 }
1271
1272 qdisc_hash_add(sch, false);
1273
1274 return sch;
1275
1276err_out5:
1277
1278 if (ops->destroy)
1279 ops->destroy(sch);
1280err_out3:
1281 dev_put(dev);
1282 qdisc_free(sch);
1283err_out2:
1284 module_put(ops->owner);
1285err_out:
1286 *errp = err;
1287 return NULL;
1288
1289err_out4:
1290
1291
1292
1293
1294 qdisc_put_stab(rtnl_dereference(sch->stab));
1295 if (ops->destroy)
1296 ops->destroy(sch);
1297 goto err_out3;
1298}
1299
1300static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1301 struct netlink_ext_ack *extack)
1302{
1303 struct qdisc_size_table *ostab, *stab = NULL;
1304 int err = 0;
1305
1306 if (tca[TCA_OPTIONS]) {
1307 if (!sch->ops->change) {
1308 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1309 return -EINVAL;
1310 }
1311 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1312 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1313 return -EOPNOTSUPP;
1314 }
1315 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1316 if (err)
1317 return err;
1318 }
1319
1320 if (tca[TCA_STAB]) {
1321 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1322 if (IS_ERR(stab))
1323 return PTR_ERR(stab);
1324 }
1325
1326 ostab = rtnl_dereference(sch->stab);
1327 rcu_assign_pointer(sch->stab, stab);
1328 qdisc_put_stab(ostab);
1329
1330 if (tca[TCA_RATE]) {
1331
1332
1333 if (sch->flags & TCQ_F_MQROOT)
1334 goto out;
1335 gen_replace_estimator(&sch->bstats,
1336 sch->cpu_bstats,
1337 &sch->rate_est,
1338 NULL,
1339 qdisc_root_sleeping_running(sch),
1340 tca[TCA_RATE]);
1341 }
1342out:
1343 return 0;
1344}
1345
1346struct check_loop_arg {
1347 struct qdisc_walker w;
1348 struct Qdisc *p;
1349 int depth;
1350};
1351
1352static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1353 struct qdisc_walker *w);
1354
1355static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1356{
1357 struct check_loop_arg arg;
1358
1359 if (q->ops->cl_ops == NULL)
1360 return 0;
1361
1362 arg.w.stop = arg.w.skip = arg.w.count = 0;
1363 arg.w.fn = check_loop_fn;
1364 arg.depth = depth;
1365 arg.p = p;
1366 q->ops->cl_ops->walk(q, &arg.w);
1367 return arg.w.stop ? -ELOOP : 0;
1368}
1369
1370static int
1371check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1372{
1373 struct Qdisc *leaf;
1374 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1375 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1376
1377 leaf = cops->leaf(q, cl);
1378 if (leaf) {
1379 if (leaf == arg->p || arg->depth > 7)
1380 return -ELOOP;
1381 return check_loop(leaf, arg->p, arg->depth + 1);
1382 }
1383 return 0;
1384}
1385
1386const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1387 [TCA_KIND] = { .type = NLA_STRING },
1388 [TCA_RATE] = { .type = NLA_BINARY,
1389 .len = sizeof(struct tc_estimator) },
1390 [TCA_STAB] = { .type = NLA_NESTED },
1391 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1392 [TCA_CHAIN] = { .type = NLA_U32 },
1393 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1394 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1395};
1396
1397
1398
1399
1400
1401static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1402 struct netlink_ext_ack *extack)
1403{
1404 struct net *net = sock_net(skb->sk);
1405 struct tcmsg *tcm = nlmsg_data(n);
1406 struct nlattr *tca[TCA_MAX + 1];
1407 struct net_device *dev;
1408 u32 clid;
1409 struct Qdisc *q = NULL;
1410 struct Qdisc *p = NULL;
1411 int err;
1412
1413 if ((n->nlmsg_type != RTM_GETQDISC) &&
1414 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1415 return -EPERM;
1416
1417 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1418 extack);
1419 if (err < 0)
1420 return err;
1421
1422 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1423 if (!dev)
1424 return -ENODEV;
1425
1426 clid = tcm->tcm_parent;
1427 if (clid) {
1428 if (clid != TC_H_ROOT) {
1429 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1430 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1431 if (!p) {
1432 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1433 return -ENOENT;
1434 }
1435 q = qdisc_leaf(p, clid);
1436 } else if (dev_ingress_queue(dev)) {
1437 q = dev_ingress_queue(dev)->qdisc_sleeping;
1438 }
1439 } else {
1440 q = dev->qdisc;
1441 }
1442 if (!q) {
1443 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1444 return -ENOENT;
1445 }
1446
1447 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1448 NL_SET_ERR_MSG(extack, "Invalid handle");
1449 return -EINVAL;
1450 }
1451 } else {
1452 q = qdisc_lookup(dev, tcm->tcm_handle);
1453 if (!q) {
1454 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1455 return -ENOENT;
1456 }
1457 }
1458
1459 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1460 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1461 return -EINVAL;
1462 }
1463
1464 if (n->nlmsg_type == RTM_DELQDISC) {
1465 if (!clid) {
1466 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1467 return -EINVAL;
1468 }
1469 if (q->handle == 0) {
1470 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1471 return -ENOENT;
1472 }
1473 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1474 if (err != 0)
1475 return err;
1476 } else {
1477 qdisc_notify(net, skb, n, clid, NULL, q);
1478 }
1479 return 0;
1480}
1481
1482
1483
1484
1485
1486static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1487 struct netlink_ext_ack *extack)
1488{
1489 struct net *net = sock_net(skb->sk);
1490 struct tcmsg *tcm;
1491 struct nlattr *tca[TCA_MAX + 1];
1492 struct net_device *dev;
1493 u32 clid;
1494 struct Qdisc *q, *p;
1495 int err;
1496
1497 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1498 return -EPERM;
1499
1500replay:
1501
1502 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1503 extack);
1504 if (err < 0)
1505 return err;
1506
1507 tcm = nlmsg_data(n);
1508 clid = tcm->tcm_parent;
1509 q = p = NULL;
1510
1511 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1512 if (!dev)
1513 return -ENODEV;
1514
1515
1516 if (clid) {
1517 if (clid != TC_H_ROOT) {
1518 if (clid != TC_H_INGRESS) {
1519 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1520 if (!p) {
1521 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1522 return -ENOENT;
1523 }
1524 q = qdisc_leaf(p, clid);
1525 } else if (dev_ingress_queue_create(dev)) {
1526 q = dev_ingress_queue(dev)->qdisc_sleeping;
1527 }
1528 } else {
1529 q = dev->qdisc;
1530 }
1531
1532
1533 if (q && q->handle == 0)
1534 q = NULL;
1535
1536 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1537 if (tcm->tcm_handle) {
1538 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1539 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1540 return -EEXIST;
1541 }
1542 if (TC_H_MIN(tcm->tcm_handle)) {
1543 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1544 return -EINVAL;
1545 }
1546 q = qdisc_lookup(dev, tcm->tcm_handle);
1547 if (!q)
1548 goto create_n_graft;
1549 if (n->nlmsg_flags & NLM_F_EXCL) {
1550 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1551 return -EEXIST;
1552 }
1553 if (tca[TCA_KIND] &&
1554 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1555 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1556 return -EINVAL;
1557 }
1558 if (q == p ||
1559 (p && check_loop(q, p, 0))) {
1560 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1561 return -ELOOP;
1562 }
1563 qdisc_refcount_inc(q);
1564 goto graft;
1565 } else {
1566 if (!q)
1567 goto create_n_graft;
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1589 (n->nlmsg_flags & NLM_F_REPLACE) &&
1590 ((n->nlmsg_flags & NLM_F_EXCL) ||
1591 (tca[TCA_KIND] &&
1592 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1593 goto create_n_graft;
1594 }
1595 }
1596 } else {
1597 if (!tcm->tcm_handle) {
1598 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1599 return -EINVAL;
1600 }
1601 q = qdisc_lookup(dev, tcm->tcm_handle);
1602 }
1603
1604
1605 if (!q) {
1606 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1607 return -ENOENT;
1608 }
1609 if (n->nlmsg_flags & NLM_F_EXCL) {
1610 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1611 return -EEXIST;
1612 }
1613 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1614 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1615 return -EINVAL;
1616 }
1617 err = qdisc_change(q, tca, extack);
1618 if (err == 0)
1619 qdisc_notify(net, skb, n, clid, NULL, q);
1620 return err;
1621
1622create_n_graft:
1623 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1624 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1625 return -ENOENT;
1626 }
1627 if (clid == TC_H_INGRESS) {
1628 if (dev_ingress_queue(dev)) {
1629 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1630 tcm->tcm_parent, tcm->tcm_parent,
1631 tca, &err, extack);
1632 } else {
1633 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1634 err = -ENOENT;
1635 }
1636 } else {
1637 struct netdev_queue *dev_queue;
1638
1639 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1640 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1641 else if (p)
1642 dev_queue = p->dev_queue;
1643 else
1644 dev_queue = netdev_get_tx_queue(dev, 0);
1645
1646 q = qdisc_create(dev, dev_queue, p,
1647 tcm->tcm_parent, tcm->tcm_handle,
1648 tca, &err, extack);
1649 }
1650 if (q == NULL) {
1651 if (err == -EAGAIN)
1652 goto replay;
1653 return err;
1654 }
1655
1656graft:
1657 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1658 if (err) {
1659 if (q)
1660 qdisc_put(q);
1661 return err;
1662 }
1663
1664 return 0;
1665}
1666
1667static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1668 struct netlink_callback *cb,
1669 int *q_idx_p, int s_q_idx, bool recur,
1670 bool dump_invisible)
1671{
1672 int ret = 0, q_idx = *q_idx_p;
1673 struct Qdisc *q;
1674 int b;
1675
1676 if (!root)
1677 return 0;
1678
1679 q = root;
1680 if (q_idx < s_q_idx) {
1681 q_idx++;
1682 } else {
1683 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1684 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1685 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1686 RTM_NEWQDISC) <= 0)
1687 goto done;
1688 q_idx++;
1689 }
1690
1691
1692
1693
1694
1695
1696
1697 if (!qdisc_dev(root) || !recur)
1698 goto out;
1699
1700 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1701 if (q_idx < s_q_idx) {
1702 q_idx++;
1703 continue;
1704 }
1705 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1706 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1707 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1708 RTM_NEWQDISC) <= 0)
1709 goto done;
1710 q_idx++;
1711 }
1712
1713out:
1714 *q_idx_p = q_idx;
1715 return ret;
1716done:
1717 ret = -1;
1718 goto out;
1719}
1720
1721static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1722{
1723 struct net *net = sock_net(skb->sk);
1724 int idx, q_idx;
1725 int s_idx, s_q_idx;
1726 struct net_device *dev;
1727 const struct nlmsghdr *nlh = cb->nlh;
1728 struct nlattr *tca[TCA_MAX + 1];
1729 int err;
1730
1731 s_idx = cb->args[0];
1732 s_q_idx = q_idx = cb->args[1];
1733
1734 idx = 0;
1735 ASSERT_RTNL();
1736
1737 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1738 rtm_tca_policy, cb->extack);
1739 if (err < 0)
1740 return err;
1741
1742 for_each_netdev(net, dev) {
1743 struct netdev_queue *dev_queue;
1744
1745 if (idx < s_idx)
1746 goto cont;
1747 if (idx > s_idx)
1748 s_q_idx = 0;
1749 q_idx = 0;
1750
1751 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1752 true, tca[TCA_DUMP_INVISIBLE]) < 0)
1753 goto done;
1754
1755 dev_queue = dev_ingress_queue(dev);
1756 if (dev_queue &&
1757 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1758 &q_idx, s_q_idx, false,
1759 tca[TCA_DUMP_INVISIBLE]) < 0)
1760 goto done;
1761
1762cont:
1763 idx++;
1764 }
1765
1766done:
1767 cb->args[0] = idx;
1768 cb->args[1] = q_idx;
1769
1770 return skb->len;
1771}
1772
1773
1774
1775
1776
1777
1778
1779static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1780 unsigned long cl,
1781 u32 portid, u32 seq, u16 flags, int event)
1782{
1783 struct tcmsg *tcm;
1784 struct nlmsghdr *nlh;
1785 unsigned char *b = skb_tail_pointer(skb);
1786 struct gnet_dump d;
1787 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1788
1789 cond_resched();
1790 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1791 if (!nlh)
1792 goto out_nlmsg_trim;
1793 tcm = nlmsg_data(nlh);
1794 tcm->tcm_family = AF_UNSPEC;
1795 tcm->tcm__pad1 = 0;
1796 tcm->tcm__pad2 = 0;
1797 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1798 tcm->tcm_parent = q->handle;
1799 tcm->tcm_handle = q->handle;
1800 tcm->tcm_info = 0;
1801 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1802 goto nla_put_failure;
1803 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1804 goto nla_put_failure;
1805
1806 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1807 NULL, &d, TCA_PAD) < 0)
1808 goto nla_put_failure;
1809
1810 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1811 goto nla_put_failure;
1812
1813 if (gnet_stats_finish_copy(&d) < 0)
1814 goto nla_put_failure;
1815
1816 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1817 return skb->len;
1818
1819out_nlmsg_trim:
1820nla_put_failure:
1821 nlmsg_trim(skb, b);
1822 return -1;
1823}
1824
1825static int tclass_notify(struct net *net, struct sk_buff *oskb,
1826 struct nlmsghdr *n, struct Qdisc *q,
1827 unsigned long cl, int event)
1828{
1829 struct sk_buff *skb;
1830 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1831
1832 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1833 if (!skb)
1834 return -ENOBUFS;
1835
1836 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1837 kfree_skb(skb);
1838 return -EINVAL;
1839 }
1840
1841 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1842 n->nlmsg_flags & NLM_F_ECHO);
1843}
1844
1845static int tclass_del_notify(struct net *net,
1846 const struct Qdisc_class_ops *cops,
1847 struct sk_buff *oskb, struct nlmsghdr *n,
1848 struct Qdisc *q, unsigned long cl)
1849{
1850 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1851 struct sk_buff *skb;
1852 int err = 0;
1853
1854 if (!cops->delete)
1855 return -EOPNOTSUPP;
1856
1857 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1858 if (!skb)
1859 return -ENOBUFS;
1860
1861 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1862 RTM_DELTCLASS) < 0) {
1863 kfree_skb(skb);
1864 return -EINVAL;
1865 }
1866
1867 err = cops->delete(q, cl);
1868 if (err) {
1869 kfree_skb(skb);
1870 return err;
1871 }
1872
1873 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1874 n->nlmsg_flags & NLM_F_ECHO);
1875}
1876
1877#ifdef CONFIG_NET_CLS
1878
1879struct tcf_bind_args {
1880 struct tcf_walker w;
1881 u32 classid;
1882 unsigned long cl;
1883};
1884
1885static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1886{
1887 struct tcf_bind_args *a = (void *)arg;
1888
1889 if (tp->ops->bind_class) {
1890 struct Qdisc *q = tcf_block_q(tp->chain->block);
1891
1892 sch_tree_lock(q);
1893 tp->ops->bind_class(n, a->classid, a->cl);
1894 sch_tree_unlock(q);
1895 }
1896 return 0;
1897}
1898
1899static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1900 unsigned long new_cl)
1901{
1902 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1903 struct tcf_block *block;
1904 struct tcf_chain *chain;
1905 unsigned long cl;
1906
1907 cl = cops->find(q, portid);
1908 if (!cl)
1909 return;
1910 block = cops->tcf_block(q, cl, NULL);
1911 if (!block)
1912 return;
1913 list_for_each_entry(chain, &block->chain_list, list) {
1914 struct tcf_proto *tp;
1915
1916 for (tp = rtnl_dereference(chain->filter_chain);
1917 tp; tp = rtnl_dereference(tp->next)) {
1918 struct tcf_bind_args arg = {};
1919
1920 arg.w.fn = tcf_node_bind;
1921 arg.classid = clid;
1922 arg.cl = new_cl;
1923 tp->ops->walk(tp, &arg.w);
1924 }
1925 }
1926}
1927
1928#else
1929
1930static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1931 unsigned long new_cl)
1932{
1933}
1934
1935#endif
1936
1937static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1938 struct netlink_ext_ack *extack)
1939{
1940 struct net *net = sock_net(skb->sk);
1941 struct tcmsg *tcm = nlmsg_data(n);
1942 struct nlattr *tca[TCA_MAX + 1];
1943 struct net_device *dev;
1944 struct Qdisc *q = NULL;
1945 const struct Qdisc_class_ops *cops;
1946 unsigned long cl = 0;
1947 unsigned long new_cl;
1948 u32 portid;
1949 u32 clid;
1950 u32 qid;
1951 int err;
1952
1953 if ((n->nlmsg_type != RTM_GETTCLASS) &&
1954 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1955 return -EPERM;
1956
1957 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1958 extack);
1959 if (err < 0)
1960 return err;
1961
1962 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1963 if (!dev)
1964 return -ENODEV;
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981 portid = tcm->tcm_parent;
1982 clid = tcm->tcm_handle;
1983 qid = TC_H_MAJ(clid);
1984
1985 if (portid != TC_H_ROOT) {
1986 u32 qid1 = TC_H_MAJ(portid);
1987
1988 if (qid && qid1) {
1989
1990 if (qid != qid1)
1991 return -EINVAL;
1992 } else if (qid1) {
1993 qid = qid1;
1994 } else if (qid == 0)
1995 qid = dev->qdisc->handle;
1996
1997
1998
1999
2000
2001
2002 if (portid)
2003 portid = TC_H_MAKE(qid, portid);
2004 } else {
2005 if (qid == 0)
2006 qid = dev->qdisc->handle;
2007 }
2008
2009
2010 q = qdisc_lookup(dev, qid);
2011 if (!q)
2012 return -ENOENT;
2013
2014
2015 cops = q->ops->cl_ops;
2016 if (cops == NULL)
2017 return -EINVAL;
2018
2019
2020 if (clid == 0) {
2021 if (portid == TC_H_ROOT)
2022 clid = qid;
2023 } else
2024 clid = TC_H_MAKE(qid, clid);
2025
2026 if (clid)
2027 cl = cops->find(q, clid);
2028
2029 if (cl == 0) {
2030 err = -ENOENT;
2031 if (n->nlmsg_type != RTM_NEWTCLASS ||
2032 !(n->nlmsg_flags & NLM_F_CREATE))
2033 goto out;
2034 } else {
2035 switch (n->nlmsg_type) {
2036 case RTM_NEWTCLASS:
2037 err = -EEXIST;
2038 if (n->nlmsg_flags & NLM_F_EXCL)
2039 goto out;
2040 break;
2041 case RTM_DELTCLASS:
2042 err = tclass_del_notify(net, cops, skb, n, q, cl);
2043
2044 tc_bind_tclass(q, portid, clid, 0);
2045 goto out;
2046 case RTM_GETTCLASS:
2047 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2048 goto out;
2049 default:
2050 err = -EINVAL;
2051 goto out;
2052 }
2053 }
2054
2055 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2056 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2057 return -EOPNOTSUPP;
2058 }
2059
2060 new_cl = cl;
2061 err = -EOPNOTSUPP;
2062 if (cops->change)
2063 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2064 if (err == 0) {
2065 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2066
2067 if (cl != new_cl)
2068 tc_bind_tclass(q, portid, clid, new_cl);
2069 }
2070out:
2071 return err;
2072}
2073
2074struct qdisc_dump_args {
2075 struct qdisc_walker w;
2076 struct sk_buff *skb;
2077 struct netlink_callback *cb;
2078};
2079
2080static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2081 struct qdisc_walker *arg)
2082{
2083 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2084
2085 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2086 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2087 RTM_NEWTCLASS);
2088}
2089
2090static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2091 struct tcmsg *tcm, struct netlink_callback *cb,
2092 int *t_p, int s_t)
2093{
2094 struct qdisc_dump_args arg;
2095
2096 if (tc_qdisc_dump_ignore(q, false) ||
2097 *t_p < s_t || !q->ops->cl_ops ||
2098 (tcm->tcm_parent &&
2099 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2100 (*t_p)++;
2101 return 0;
2102 }
2103 if (*t_p > s_t)
2104 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2105 arg.w.fn = qdisc_class_dump;
2106 arg.skb = skb;
2107 arg.cb = cb;
2108 arg.w.stop = 0;
2109 arg.w.skip = cb->args[1];
2110 arg.w.count = 0;
2111 q->ops->cl_ops->walk(q, &arg.w);
2112 cb->args[1] = arg.w.count;
2113 if (arg.w.stop)
2114 return -1;
2115 (*t_p)++;
2116 return 0;
2117}
2118
2119static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2120 struct tcmsg *tcm, struct netlink_callback *cb,
2121 int *t_p, int s_t)
2122{
2123 struct Qdisc *q;
2124 int b;
2125
2126 if (!root)
2127 return 0;
2128
2129 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2130 return -1;
2131
2132 if (!qdisc_dev(root))
2133 return 0;
2134
2135 if (tcm->tcm_parent) {
2136 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2137 if (q && q != root &&
2138 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2139 return -1;
2140 return 0;
2141 }
2142 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2143 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2144 return -1;
2145 }
2146
2147 return 0;
2148}
2149
2150static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2151{
2152 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2153 struct net *net = sock_net(skb->sk);
2154 struct netdev_queue *dev_queue;
2155 struct net_device *dev;
2156 int t, s_t;
2157
2158 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2159 return 0;
2160 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2161 if (!dev)
2162 return 0;
2163
2164 s_t = cb->args[0];
2165 t = 0;
2166
2167 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2168 goto done;
2169
2170 dev_queue = dev_ingress_queue(dev);
2171 if (dev_queue &&
2172 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2173 &t, s_t) < 0)
2174 goto done;
2175
2176done:
2177 cb->args[0] = t;
2178
2179 dev_put(dev);
2180 return skb->len;
2181}
2182
2183#ifdef CONFIG_PROC_FS
2184static int psched_show(struct seq_file *seq, void *v)
2185{
2186 seq_printf(seq, "%08x %08x %08x %08x\n",
2187 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2188 1000000,
2189 (u32)NSEC_PER_SEC / hrtimer_resolution);
2190
2191 return 0;
2192}
2193
2194static int __net_init psched_net_init(struct net *net)
2195{
2196 struct proc_dir_entry *e;
2197
2198 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2199 if (e == NULL)
2200 return -ENOMEM;
2201
2202 return 0;
2203}
2204
2205static void __net_exit psched_net_exit(struct net *net)
2206{
2207 remove_proc_entry("psched", net->proc_net);
2208}
2209#else
2210static int __net_init psched_net_init(struct net *net)
2211{
2212 return 0;
2213}
2214
2215static void __net_exit psched_net_exit(struct net *net)
2216{
2217}
2218#endif
2219
2220static struct pernet_operations psched_net_ops = {
2221 .init = psched_net_init,
2222 .exit = psched_net_exit,
2223};
2224
2225static int __init pktsched_init(void)
2226{
2227 int err;
2228
2229 err = register_pernet_subsys(&psched_net_ops);
2230 if (err) {
2231 pr_err("pktsched_init: "
2232 "cannot initialize per netns operations\n");
2233 return err;
2234 }
2235
2236 register_qdisc(&pfifo_fast_ops);
2237 register_qdisc(&pfifo_qdisc_ops);
2238 register_qdisc(&bfifo_qdisc_ops);
2239 register_qdisc(&pfifo_head_drop_qdisc_ops);
2240 register_qdisc(&mq_qdisc_ops);
2241 register_qdisc(&noqueue_qdisc_ops);
2242
2243 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2244 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2245 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2246 0);
2247 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2248 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2249 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2250 0);
2251
2252 return 0;
2253}
2254
2255subsys_initcall(pktsched_init);
2256