1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/types.h>
27#include <linux/kernel.h>
28#include <linux/string.h>
29#include <linux/errno.h>
30#include <linux/skbuff.h>
31#include <linux/list.h>
32#include <linux/compiler.h>
33#include <linux/rbtree.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <net/netlink.h>
37#include <net/sch_generic.h>
38#include <net/pkt_sched.h>
39#include <net/pkt_cls.h>
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54static int htb_hysteresis __read_mostly = 0;
55#define HTB_VER 0x30011
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
61
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65static int htb_rate_est = 0;
66module_param(htb_rate_est, int, 0640);
67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69
70enum htb_cmode {
71 HTB_CANT_SEND,
72 HTB_MAY_BORROW,
73 HTB_CAN_SEND
74};
75
76struct htb_prio {
77 union {
78 struct rb_root row;
79 struct rb_root feed;
80 };
81 struct rb_node *ptr;
82
83
84
85
86
87 u32 last_ptr_id;
88};
89
90
91
92
93
94struct htb_class {
95 struct Qdisc_class_common common;
96 struct psched_ratecfg rate;
97 struct psched_ratecfg ceil;
98 s64 buffer, cbuffer;
99 s64 mbuffer;
100 u32 prio;
101 int quantum;
102
103 struct tcf_proto __rcu *filter_list;
104 struct tcf_block *block;
105 int filter_cnt;
106
107 int level;
108 unsigned int children;
109 struct htb_class *parent;
110
111 struct net_rate_estimator __rcu *rate_est;
112
113
114
115
116 struct gnet_stats_basic_packed bstats;
117 struct tc_htb_xstats xstats;
118
119
120 s64 tokens, ctokens;
121 s64 t_c;
122
123 union {
124 struct htb_class_leaf {
125 int deficit[TC_HTB_MAXDEPTH];
126 struct Qdisc *q;
127 } leaf;
128 struct htb_class_inner {
129 struct htb_prio clprio[TC_HTB_NUMPRIO];
130 } inner;
131 };
132 s64 pq_key;
133
134 int prio_activity;
135 enum htb_cmode cmode;
136 struct rb_node pq_node;
137 struct rb_node node[TC_HTB_NUMPRIO];
138
139 unsigned int drops ____cacheline_aligned_in_smp;
140 unsigned int overlimits;
141};
142
143struct htb_level {
144 struct rb_root wait_pq;
145 struct htb_prio hprio[TC_HTB_NUMPRIO];
146};
147
148struct htb_sched {
149 struct Qdisc_class_hash clhash;
150 int defcls;
151 int rate2quantum;
152
153
154 struct tcf_proto __rcu *filter_list;
155 struct tcf_block *block;
156
157#define HTB_WARN_TOOMANYEVENTS 0x1
158 unsigned int warned;
159 int direct_qlen;
160 struct work_struct work;
161
162
163 struct qdisc_skb_head direct_queue;
164 u32 direct_pkts;
165 u32 overlimits;
166
167 struct qdisc_watchdog watchdog;
168
169 s64 now;
170
171
172 s64 near_ev_cache[TC_HTB_MAXDEPTH];
173
174 int row_mask[TC_HTB_MAXDEPTH];
175
176 struct htb_level hlevel[TC_HTB_MAXDEPTH];
177};
178
179
180static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
181{
182 struct htb_sched *q = qdisc_priv(sch);
183 struct Qdisc_class_common *clc;
184
185 clc = qdisc_class_find(&q->clhash, handle);
186 if (clc == NULL)
187 return NULL;
188 return container_of(clc, struct htb_class, common);
189}
190
191static unsigned long htb_search(struct Qdisc *sch, u32 handle)
192{
193 return (unsigned long)htb_find(handle, sch);
194}
195
196
197
198
199
200
201
202
203
204
205
206
207#define HTB_DIRECT ((struct htb_class *)-1L)
208
209static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
210 int *qerr)
211{
212 struct htb_sched *q = qdisc_priv(sch);
213 struct htb_class *cl;
214 struct tcf_result res;
215 struct tcf_proto *tcf;
216 int result;
217
218
219
220
221
222 if (skb->priority == sch->handle)
223 return HTB_DIRECT;
224 cl = htb_find(skb->priority, sch);
225 if (cl) {
226 if (cl->level == 0)
227 return cl;
228
229 tcf = rcu_dereference_bh(cl->filter_list);
230 } else {
231 tcf = rcu_dereference_bh(q->filter_list);
232 }
233
234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
235 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
236#ifdef CONFIG_NET_CLS_ACT
237 switch (result) {
238 case TC_ACT_QUEUED:
239 case TC_ACT_STOLEN:
240 case TC_ACT_TRAP:
241 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
242 fallthrough;
243 case TC_ACT_SHOT:
244 return NULL;
245 }
246#endif
247 cl = (void *)res.class;
248 if (!cl) {
249 if (res.classid == sch->handle)
250 return HTB_DIRECT;
251 cl = htb_find(res.classid, sch);
252 if (!cl)
253 break;
254 }
255 if (!cl->level)
256 return cl;
257
258
259 tcf = rcu_dereference_bh(cl->filter_list);
260 }
261
262 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
263 if (!cl || cl->level)
264 return HTB_DIRECT;
265 return cl;
266}
267
268
269
270
271
272
273
274static void htb_add_to_id_tree(struct rb_root *root,
275 struct htb_class *cl, int prio)
276{
277 struct rb_node **p = &root->rb_node, *parent = NULL;
278
279 while (*p) {
280 struct htb_class *c;
281 parent = *p;
282 c = rb_entry(parent, struct htb_class, node[prio]);
283
284 if (cl->common.classid > c->common.classid)
285 p = &parent->rb_right;
286 else
287 p = &parent->rb_left;
288 }
289 rb_link_node(&cl->node[prio], parent, p);
290 rb_insert_color(&cl->node[prio], root);
291}
292
293
294
295
296
297
298
299
300static void htb_add_to_wait_tree(struct htb_sched *q,
301 struct htb_class *cl, s64 delay)
302{
303 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
304
305 cl->pq_key = q->now + delay;
306 if (cl->pq_key == q->now)
307 cl->pq_key++;
308
309
310 if (q->near_ev_cache[cl->level] > cl->pq_key)
311 q->near_ev_cache[cl->level] = cl->pq_key;
312
313 while (*p) {
314 struct htb_class *c;
315 parent = *p;
316 c = rb_entry(parent, struct htb_class, pq_node);
317 if (cl->pq_key >= c->pq_key)
318 p = &parent->rb_right;
319 else
320 p = &parent->rb_left;
321 }
322 rb_link_node(&cl->pq_node, parent, p);
323 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
324}
325
326
327
328
329
330
331
332static inline void htb_next_rb_node(struct rb_node **n)
333{
334 *n = rb_next(*n);
335}
336
337
338
339
340
341
342
343static inline void htb_add_class_to_row(struct htb_sched *q,
344 struct htb_class *cl, int mask)
345{
346 q->row_mask[cl->level] |= mask;
347 while (mask) {
348 int prio = ffz(~mask);
349 mask &= ~(1 << prio);
350 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
351 }
352}
353
354
355static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
356{
357 if (RB_EMPTY_NODE(rb)) {
358 WARN_ON(1);
359 } else {
360 rb_erase(rb, root);
361 RB_CLEAR_NODE(rb);
362 }
363}
364
365
366
367
368
369
370
371
372static inline void htb_remove_class_from_row(struct htb_sched *q,
373 struct htb_class *cl, int mask)
374{
375 int m = 0;
376 struct htb_level *hlevel = &q->hlevel[cl->level];
377
378 while (mask) {
379 int prio = ffz(~mask);
380 struct htb_prio *hprio = &hlevel->hprio[prio];
381
382 mask &= ~(1 << prio);
383 if (hprio->ptr == cl->node + prio)
384 htb_next_rb_node(&hprio->ptr);
385
386 htb_safe_rb_erase(cl->node + prio, &hprio->row);
387 if (!hprio->row.rb_node)
388 m |= 1 << prio;
389 }
390 q->row_mask[cl->level] &= ~m;
391}
392
393
394
395
396
397
398
399
400static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
401{
402 struct htb_class *p = cl->parent;
403 long m, mask = cl->prio_activity;
404
405 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
406 m = mask;
407 while (m) {
408 int prio = ffz(~m);
409 m &= ~(1 << prio);
410
411 if (p->inner.clprio[prio].feed.rb_node)
412
413
414
415 mask &= ~(1 << prio);
416
417 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
418 }
419 p->prio_activity |= mask;
420 cl = p;
421 p = cl->parent;
422
423 }
424 if (cl->cmode == HTB_CAN_SEND && mask)
425 htb_add_class_to_row(q, cl, mask);
426}
427
428
429
430
431
432
433
434
435static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
436{
437 struct htb_class *p = cl->parent;
438 long m, mask = cl->prio_activity;
439
440 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
441 m = mask;
442 mask = 0;
443 while (m) {
444 int prio = ffz(~m);
445 m &= ~(1 << prio);
446
447 if (p->inner.clprio[prio].ptr == cl->node + prio) {
448
449
450
451
452 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
453 p->inner.clprio[prio].ptr = NULL;
454 }
455
456 htb_safe_rb_erase(cl->node + prio,
457 &p->inner.clprio[prio].feed);
458
459 if (!p->inner.clprio[prio].feed.rb_node)
460 mask |= 1 << prio;
461 }
462
463 p->prio_activity &= ~mask;
464 cl = p;
465 p = cl->parent;
466
467 }
468 if (cl->cmode == HTB_CAN_SEND && mask)
469 htb_remove_class_from_row(q, cl, mask);
470}
471
472static inline s64 htb_lowater(const struct htb_class *cl)
473{
474 if (htb_hysteresis)
475 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
476 else
477 return 0;
478}
479static inline s64 htb_hiwater(const struct htb_class *cl)
480{
481 if (htb_hysteresis)
482 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
483 else
484 return 0;
485}
486
487
488
489
490
491
492
493
494
495
496
497
498
499static inline enum htb_cmode
500htb_class_mode(struct htb_class *cl, s64 *diff)
501{
502 s64 toks;
503
504 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
505 *diff = -toks;
506 return HTB_CANT_SEND;
507 }
508
509 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
510 return HTB_CAN_SEND;
511
512 *diff = -toks;
513 return HTB_MAY_BORROW;
514}
515
516
517
518
519
520
521
522
523
524
525static void
526htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
527{
528 enum htb_cmode new_mode = htb_class_mode(cl, diff);
529
530 if (new_mode == cl->cmode)
531 return;
532
533 if (new_mode == HTB_CANT_SEND) {
534 cl->overlimits++;
535 q->overlimits++;
536 }
537
538 if (cl->prio_activity) {
539 if (cl->cmode != HTB_CANT_SEND)
540 htb_deactivate_prios(q, cl);
541 cl->cmode = new_mode;
542 if (new_mode != HTB_CANT_SEND)
543 htb_activate_prios(q, cl);
544 } else
545 cl->cmode = new_mode;
546}
547
548
549
550
551
552
553
554
555static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
556{
557 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
558
559 if (!cl->prio_activity) {
560 cl->prio_activity = 1 << cl->prio;
561 htb_activate_prios(q, cl);
562 }
563}
564
565
566
567
568
569
570
571static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
572{
573 WARN_ON(!cl->prio_activity);
574
575 htb_deactivate_prios(q, cl);
576 cl->prio_activity = 0;
577}
578
579static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
580 struct sk_buff **to_free)
581{
582 int ret;
583 unsigned int len = qdisc_pkt_len(skb);
584 struct htb_sched *q = qdisc_priv(sch);
585 struct htb_class *cl = htb_classify(skb, sch, &ret);
586
587 if (cl == HTB_DIRECT) {
588
589 if (q->direct_queue.qlen < q->direct_qlen) {
590 __qdisc_enqueue_tail(skb, &q->direct_queue);
591 q->direct_pkts++;
592 } else {
593 return qdisc_drop(skb, sch, to_free);
594 }
595#ifdef CONFIG_NET_CLS_ACT
596 } else if (!cl) {
597 if (ret & __NET_XMIT_BYPASS)
598 qdisc_qstats_drop(sch);
599 __qdisc_drop(skb, to_free);
600 return ret;
601#endif
602 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
603 to_free)) != NET_XMIT_SUCCESS) {
604 if (net_xmit_drop_count(ret)) {
605 qdisc_qstats_drop(sch);
606 cl->drops++;
607 }
608 return ret;
609 } else {
610 htb_activate(q, cl);
611 }
612
613 sch->qstats.backlog += len;
614 sch->q.qlen++;
615 return NET_XMIT_SUCCESS;
616}
617
618static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
619{
620 s64 toks = diff + cl->tokens;
621
622 if (toks > cl->buffer)
623 toks = cl->buffer;
624 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
625 if (toks <= -cl->mbuffer)
626 toks = 1 - cl->mbuffer;
627
628 cl->tokens = toks;
629}
630
631static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
632{
633 s64 toks = diff + cl->ctokens;
634
635 if (toks > cl->cbuffer)
636 toks = cl->cbuffer;
637 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
638 if (toks <= -cl->mbuffer)
639 toks = 1 - cl->mbuffer;
640
641 cl->ctokens = toks;
642}
643
644
645
646
647
648
649
650
651
652
653
654
655static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
656 int level, struct sk_buff *skb)
657{
658 int bytes = qdisc_pkt_len(skb);
659 enum htb_cmode old_mode;
660 s64 diff;
661
662 while (cl) {
663 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
664 if (cl->level >= level) {
665 if (cl->level == level)
666 cl->xstats.lends++;
667 htb_accnt_tokens(cl, bytes, diff);
668 } else {
669 cl->xstats.borrows++;
670 cl->tokens += diff;
671 }
672 htb_accnt_ctokens(cl, bytes, diff);
673 cl->t_c = q->now;
674
675 old_mode = cl->cmode;
676 diff = 0;
677 htb_change_class_mode(q, cl, &diff);
678 if (old_mode != cl->cmode) {
679 if (old_mode != HTB_CAN_SEND)
680 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
681 if (cl->cmode != HTB_CAN_SEND)
682 htb_add_to_wait_tree(q, cl, diff);
683 }
684
685
686 if (cl->level)
687 bstats_update(&cl->bstats, skb);
688
689 cl = cl->parent;
690 }
691}
692
693
694
695
696
697
698
699
700static s64 htb_do_events(struct htb_sched *q, const int level,
701 unsigned long start)
702{
703
704
705
706
707 unsigned long stop_at = start + 2;
708 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
709
710 while (time_before(jiffies, stop_at)) {
711 struct htb_class *cl;
712 s64 diff;
713 struct rb_node *p = rb_first(wait_pq);
714
715 if (!p)
716 return 0;
717
718 cl = rb_entry(p, struct htb_class, pq_node);
719 if (cl->pq_key > q->now)
720 return cl->pq_key;
721
722 htb_safe_rb_erase(p, wait_pq);
723 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
724 htb_change_class_mode(q, cl, &diff);
725 if (cl->cmode != HTB_CAN_SEND)
726 htb_add_to_wait_tree(q, cl, diff);
727 }
728
729
730 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
731 pr_warn("htb: too many events!\n");
732 q->warned |= HTB_WARN_TOOMANYEVENTS;
733 }
734
735 return q->now;
736}
737
738
739
740
741static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
742 u32 id)
743{
744 struct rb_node *r = NULL;
745 while (n) {
746 struct htb_class *cl =
747 rb_entry(n, struct htb_class, node[prio]);
748
749 if (id > cl->common.classid) {
750 n = n->rb_right;
751 } else if (id < cl->common.classid) {
752 r = n;
753 n = n->rb_left;
754 } else {
755 return n;
756 }
757 }
758 return r;
759}
760
761
762
763
764
765
766static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
767{
768 int i;
769 struct {
770 struct rb_node *root;
771 struct rb_node **pptr;
772 u32 *pid;
773 } stk[TC_HTB_MAXDEPTH], *sp = stk;
774
775 BUG_ON(!hprio->row.rb_node);
776 sp->root = hprio->row.rb_node;
777 sp->pptr = &hprio->ptr;
778 sp->pid = &hprio->last_ptr_id;
779
780 for (i = 0; i < 65535; i++) {
781 if (!*sp->pptr && *sp->pid) {
782
783
784
785 *sp->pptr =
786 htb_id_find_next_upper(prio, sp->root, *sp->pid);
787 }
788 *sp->pid = 0;
789
790
791 if (!*sp->pptr) {
792 *sp->pptr = sp->root;
793 while ((*sp->pptr)->rb_left)
794 *sp->pptr = (*sp->pptr)->rb_left;
795 if (sp > stk) {
796 sp--;
797 if (!*sp->pptr) {
798 WARN_ON(1);
799 return NULL;
800 }
801 htb_next_rb_node(sp->pptr);
802 }
803 } else {
804 struct htb_class *cl;
805 struct htb_prio *clp;
806
807 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
808 if (!cl->level)
809 return cl;
810 clp = &cl->inner.clprio[prio];
811 (++sp)->root = clp->feed.rb_node;
812 sp->pptr = &clp->ptr;
813 sp->pid = &clp->last_ptr_id;
814 }
815 }
816 WARN_ON(1);
817 return NULL;
818}
819
820
821
822
823static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
824 const int level)
825{
826 struct sk_buff *skb = NULL;
827 struct htb_class *cl, *start;
828 struct htb_level *hlevel = &q->hlevel[level];
829 struct htb_prio *hprio = &hlevel->hprio[prio];
830
831
832 start = cl = htb_lookup_leaf(hprio, prio);
833
834 do {
835next:
836 if (unlikely(!cl))
837 return NULL;
838
839
840
841
842
843
844 if (unlikely(cl->leaf.q->q.qlen == 0)) {
845 struct htb_class *next;
846 htb_deactivate(q, cl);
847
848
849 if ((q->row_mask[level] & (1 << prio)) == 0)
850 return NULL;
851
852 next = htb_lookup_leaf(hprio, prio);
853
854 if (cl == start)
855 start = next;
856 cl = next;
857 goto next;
858 }
859
860 skb = cl->leaf.q->dequeue(cl->leaf.q);
861 if (likely(skb != NULL))
862 break;
863
864 qdisc_warn_nonwc("htb", cl->leaf.q);
865 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
866 &q->hlevel[0].hprio[prio].ptr);
867 cl = htb_lookup_leaf(hprio, prio);
868
869 } while (cl != start);
870
871 if (likely(skb != NULL)) {
872 bstats_update(&cl->bstats, skb);
873 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
874 if (cl->leaf.deficit[level] < 0) {
875 cl->leaf.deficit[level] += cl->quantum;
876 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
877 &q->hlevel[0].hprio[prio].ptr);
878 }
879
880
881
882 if (!cl->leaf.q->q.qlen)
883 htb_deactivate(q, cl);
884 htb_charge_class(q, cl, level, skb);
885 }
886 return skb;
887}
888
889static struct sk_buff *htb_dequeue(struct Qdisc *sch)
890{
891 struct sk_buff *skb;
892 struct htb_sched *q = qdisc_priv(sch);
893 int level;
894 s64 next_event;
895 unsigned long start_at;
896
897
898 skb = __qdisc_dequeue_head(&q->direct_queue);
899 if (skb != NULL) {
900ok:
901 qdisc_bstats_update(sch, skb);
902 qdisc_qstats_backlog_dec(sch, skb);
903 sch->q.qlen--;
904 return skb;
905 }
906
907 if (!sch->q.qlen)
908 goto fin;
909 q->now = ktime_get_ns();
910 start_at = jiffies;
911
912 next_event = q->now + 5LLU * NSEC_PER_SEC;
913
914 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
915
916 int m;
917 s64 event = q->near_ev_cache[level];
918
919 if (q->now >= event) {
920 event = htb_do_events(q, level, start_at);
921 if (!event)
922 event = q->now + NSEC_PER_SEC;
923 q->near_ev_cache[level] = event;
924 }
925
926 if (next_event > event)
927 next_event = event;
928
929 m = ~q->row_mask[level];
930 while (m != (int)(-1)) {
931 int prio = ffz(m);
932
933 m |= 1 << prio;
934 skb = htb_dequeue_tree(q, prio, level);
935 if (likely(skb != NULL))
936 goto ok;
937 }
938 }
939 if (likely(next_event > q->now))
940 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
941 else
942 schedule_work(&q->work);
943fin:
944 return skb;
945}
946
947
948
949static void htb_reset(struct Qdisc *sch)
950{
951 struct htb_sched *q = qdisc_priv(sch);
952 struct htb_class *cl;
953 unsigned int i;
954
955 for (i = 0; i < q->clhash.hashsize; i++) {
956 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
957 if (cl->level)
958 memset(&cl->inner, 0, sizeof(cl->inner));
959 else {
960 if (cl->leaf.q)
961 qdisc_reset(cl->leaf.q);
962 }
963 cl->prio_activity = 0;
964 cl->cmode = HTB_CAN_SEND;
965 }
966 }
967 qdisc_watchdog_cancel(&q->watchdog);
968 __qdisc_reset_queue(&q->direct_queue);
969 sch->q.qlen = 0;
970 sch->qstats.backlog = 0;
971 memset(q->hlevel, 0, sizeof(q->hlevel));
972 memset(q->row_mask, 0, sizeof(q->row_mask));
973}
974
975static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
976 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
977 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
978 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
979 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
980 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
981 [TCA_HTB_RATE64] = { .type = NLA_U64 },
982 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
983};
984
985static void htb_work_func(struct work_struct *work)
986{
987 struct htb_sched *q = container_of(work, struct htb_sched, work);
988 struct Qdisc *sch = q->watchdog.qdisc;
989
990 rcu_read_lock();
991 __netif_schedule(qdisc_root(sch));
992 rcu_read_unlock();
993}
994
995static int htb_init(struct Qdisc *sch, struct nlattr *opt,
996 struct netlink_ext_ack *extack)
997{
998 struct htb_sched *q = qdisc_priv(sch);
999 struct nlattr *tb[TCA_HTB_MAX + 1];
1000 struct tc_htb_glob *gopt;
1001 int err;
1002
1003 qdisc_watchdog_init(&q->watchdog, sch);
1004 INIT_WORK(&q->work, htb_work_func);
1005
1006 if (!opt)
1007 return -EINVAL;
1008
1009 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1010 if (err)
1011 return err;
1012
1013 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1014 NULL);
1015 if (err < 0)
1016 return err;
1017
1018 if (!tb[TCA_HTB_INIT])
1019 return -EINVAL;
1020
1021 gopt = nla_data(tb[TCA_HTB_INIT]);
1022 if (gopt->version != HTB_VER >> 16)
1023 return -EINVAL;
1024
1025 err = qdisc_class_hash_init(&q->clhash);
1026 if (err < 0)
1027 return err;
1028
1029 qdisc_skb_head_init(&q->direct_queue);
1030
1031 if (tb[TCA_HTB_DIRECT_QLEN])
1032 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1033 else
1034 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1035
1036 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1037 q->rate2quantum = 1;
1038 q->defcls = gopt->defcls;
1039
1040 return 0;
1041}
1042
1043static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1044{
1045 struct htb_sched *q = qdisc_priv(sch);
1046 struct nlattr *nest;
1047 struct tc_htb_glob gopt;
1048
1049 sch->qstats.overlimits = q->overlimits;
1050
1051
1052
1053
1054 gopt.direct_pkts = q->direct_pkts;
1055 gopt.version = HTB_VER;
1056 gopt.rate2quantum = q->rate2quantum;
1057 gopt.defcls = q->defcls;
1058 gopt.debug = 0;
1059
1060 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1061 if (nest == NULL)
1062 goto nla_put_failure;
1063 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1064 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1065 goto nla_put_failure;
1066
1067 return nla_nest_end(skb, nest);
1068
1069nla_put_failure:
1070 nla_nest_cancel(skb, nest);
1071 return -1;
1072}
1073
1074static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075 struct sk_buff *skb, struct tcmsg *tcm)
1076{
1077 struct htb_class *cl = (struct htb_class *)arg;
1078 struct nlattr *nest;
1079 struct tc_htb_opt opt;
1080
1081
1082
1083
1084 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1085 tcm->tcm_handle = cl->common.classid;
1086 if (!cl->level && cl->leaf.q)
1087 tcm->tcm_info = cl->leaf.q->handle;
1088
1089 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1090 if (nest == NULL)
1091 goto nla_put_failure;
1092
1093 memset(&opt, 0, sizeof(opt));
1094
1095 psched_ratecfg_getrate(&opt.rate, &cl->rate);
1096 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1097 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1098 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1099 opt.quantum = cl->quantum;
1100 opt.prio = cl->prio;
1101 opt.level = cl->level;
1102 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1103 goto nla_put_failure;
1104 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1105 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1106 TCA_HTB_PAD))
1107 goto nla_put_failure;
1108 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1109 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1110 TCA_HTB_PAD))
1111 goto nla_put_failure;
1112
1113 return nla_nest_end(skb, nest);
1114
1115nla_put_failure:
1116 nla_nest_cancel(skb, nest);
1117 return -1;
1118}
1119
1120static int
1121htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1122{
1123 struct htb_class *cl = (struct htb_class *)arg;
1124 struct gnet_stats_queue qs = {
1125 .drops = cl->drops,
1126 .overlimits = cl->overlimits,
1127 };
1128 __u32 qlen = 0;
1129
1130 if (!cl->level && cl->leaf.q)
1131 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1132
1133 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1134 INT_MIN, INT_MAX);
1135 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1136 INT_MIN, INT_MAX);
1137
1138 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1139 d, NULL, &cl->bstats) < 0 ||
1140 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1141 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1142 return -1;
1143
1144 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1145}
1146
1147static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1148 struct Qdisc **old, struct netlink_ext_ack *extack)
1149{
1150 struct htb_class *cl = (struct htb_class *)arg;
1151
1152 if (cl->level)
1153 return -EINVAL;
1154 if (new == NULL &&
1155 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1156 cl->common.classid, extack)) == NULL)
1157 return -ENOBUFS;
1158
1159 *old = qdisc_replace(sch, new, &cl->leaf.q);
1160 return 0;
1161}
1162
1163static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1164{
1165 struct htb_class *cl = (struct htb_class *)arg;
1166 return !cl->level ? cl->leaf.q : NULL;
1167}
1168
1169static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1170{
1171 struct htb_class *cl = (struct htb_class *)arg;
1172
1173 htb_deactivate(qdisc_priv(sch), cl);
1174}
1175
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178 if (!cl->parent)
1179
1180 return 0;
1181 if (cl->parent->children > 1)
1182
1183 return 0;
1184 return 1;
1185}
1186
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188 struct Qdisc *new_q)
1189{
1190 struct htb_class *parent = cl->parent;
1191
1192 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1193
1194 if (parent->cmode != HTB_CAN_SEND)
1195 htb_safe_rb_erase(&parent->pq_node,
1196 &q->hlevel[parent->level].wait_pq);
1197
1198 parent->level = 0;
1199 memset(&parent->inner, 0, sizeof(parent->inner));
1200 parent->leaf.q = new_q ? new_q : &noop_qdisc;
1201 parent->tokens = parent->buffer;
1202 parent->ctokens = parent->cbuffer;
1203 parent->t_c = ktime_get_ns();
1204 parent->cmode = HTB_CAN_SEND;
1205}
1206
1207static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1208{
1209 if (!cl->level) {
1210 WARN_ON(!cl->leaf.q);
1211 qdisc_put(cl->leaf.q);
1212 }
1213 gen_kill_estimator(&cl->rate_est);
1214 tcf_block_put(cl->block);
1215 kfree(cl);
1216}
1217
1218static void htb_destroy(struct Qdisc *sch)
1219{
1220 struct htb_sched *q = qdisc_priv(sch);
1221 struct hlist_node *next;
1222 struct htb_class *cl;
1223 unsigned int i;
1224
1225 cancel_work_sync(&q->work);
1226 qdisc_watchdog_cancel(&q->watchdog);
1227
1228
1229
1230
1231
1232 tcf_block_put(q->block);
1233
1234 for (i = 0; i < q->clhash.hashsize; i++) {
1235 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1236 tcf_block_put(cl->block);
1237 cl->block = NULL;
1238 }
1239 }
1240 for (i = 0; i < q->clhash.hashsize; i++) {
1241 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1242 common.hnode)
1243 htb_destroy_class(sch, cl);
1244 }
1245 qdisc_class_hash_destroy(&q->clhash);
1246 __qdisc_reset_queue(&q->direct_queue);
1247}
1248
1249static int htb_delete(struct Qdisc *sch, unsigned long arg)
1250{
1251 struct htb_sched *q = qdisc_priv(sch);
1252 struct htb_class *cl = (struct htb_class *)arg;
1253 struct Qdisc *new_q = NULL;
1254 int last_child = 0;
1255
1256
1257
1258
1259
1260 if (cl->children || cl->filter_cnt)
1261 return -EBUSY;
1262
1263 if (!cl->level && htb_parent_last_child(cl)) {
1264 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1265 cl->parent->common.classid,
1266 NULL);
1267 last_child = 1;
1268 }
1269
1270 sch_tree_lock(sch);
1271
1272 if (!cl->level)
1273 qdisc_purge_queue(cl->leaf.q);
1274
1275
1276 qdisc_class_hash_remove(&q->clhash, &cl->common);
1277 if (cl->parent)
1278 cl->parent->children--;
1279
1280 if (cl->prio_activity)
1281 htb_deactivate(q, cl);
1282
1283 if (cl->cmode != HTB_CAN_SEND)
1284 htb_safe_rb_erase(&cl->pq_node,
1285 &q->hlevel[cl->level].wait_pq);
1286
1287 if (last_child)
1288 htb_parent_to_leaf(q, cl, new_q);
1289
1290 sch_tree_unlock(sch);
1291
1292 htb_destroy_class(sch, cl);
1293 return 0;
1294}
1295
1296static int htb_change_class(struct Qdisc *sch, u32 classid,
1297 u32 parentid, struct nlattr **tca,
1298 unsigned long *arg, struct netlink_ext_ack *extack)
1299{
1300 int err = -EINVAL;
1301 struct htb_sched *q = qdisc_priv(sch);
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct nlattr *tb[TCA_HTB_MAX + 1];
1305 struct Qdisc *parent_qdisc = NULL;
1306 struct tc_htb_opt *hopt;
1307 u64 rate64, ceil64;
1308 int warn = 0;
1309
1310
1311 if (!opt)
1312 goto failure;
1313
1314 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1315 NULL);
1316 if (err < 0)
1317 goto failure;
1318
1319 err = -EINVAL;
1320 if (tb[TCA_HTB_PARMS] == NULL)
1321 goto failure;
1322
1323 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1324
1325 hopt = nla_data(tb[TCA_HTB_PARMS]);
1326 if (!hopt->rate.rate || !hopt->ceil.rate)
1327 goto failure;
1328
1329
1330 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1331 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1332 NULL));
1333
1334 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1335 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1336 NULL));
1337
1338 if (!cl) {
1339 struct Qdisc *new_q;
1340 int prio;
1341 struct {
1342 struct nlattr nla;
1343 struct gnet_estimator opt;
1344 } est = {
1345 .nla = {
1346 .nla_len = nla_attr_size(sizeof(est.opt)),
1347 .nla_type = TCA_RATE,
1348 },
1349 .opt = {
1350
1351 .interval = 2,
1352 .ewma_log = 2,
1353 },
1354 };
1355
1356
1357 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1358 htb_find(classid, sch))
1359 goto failure;
1360
1361
1362 if (parent && parent->parent && parent->parent->level < 2) {
1363 pr_err("htb: tree is too deep\n");
1364 goto failure;
1365 }
1366 err = -ENOBUFS;
1367 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1368 if (!cl)
1369 goto failure;
1370
1371 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1372 if (err) {
1373 kfree(cl);
1374 goto failure;
1375 }
1376 if (htb_rate_est || tca[TCA_RATE]) {
1377 err = gen_new_estimator(&cl->bstats, NULL,
1378 &cl->rate_est,
1379 NULL,
1380 qdisc_root_sleeping_running(sch),
1381 tca[TCA_RATE] ? : &est.nla);
1382 if (err) {
1383 tcf_block_put(cl->block);
1384 kfree(cl);
1385 goto failure;
1386 }
1387 }
1388
1389 cl->children = 0;
1390 RB_CLEAR_NODE(&cl->pq_node);
1391
1392 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1393 RB_CLEAR_NODE(&cl->node[prio]);
1394
1395
1396
1397
1398
1399 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1400 classid, NULL);
1401 sch_tree_lock(sch);
1402 if (parent && !parent->level) {
1403
1404 qdisc_purge_queue(parent->leaf.q);
1405 parent_qdisc = parent->leaf.q;
1406 if (parent->prio_activity)
1407 htb_deactivate(q, parent);
1408
1409
1410 if (parent->cmode != HTB_CAN_SEND) {
1411 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1412 parent->cmode = HTB_CAN_SEND;
1413 }
1414 parent->level = (parent->parent ? parent->parent->level
1415 : TC_HTB_MAXDEPTH) - 1;
1416 memset(&parent->inner, 0, sizeof(parent->inner));
1417 }
1418
1419 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1420
1421 cl->common.classid = classid;
1422 cl->parent = parent;
1423
1424
1425 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1426 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1427 cl->mbuffer = 60ULL * NSEC_PER_SEC;
1428 cl->t_c = ktime_get_ns();
1429 cl->cmode = HTB_CAN_SEND;
1430
1431
1432 qdisc_class_hash_insert(&q->clhash, &cl->common);
1433 if (parent)
1434 parent->children++;
1435 if (cl->leaf.q != &noop_qdisc)
1436 qdisc_hash_add(cl->leaf.q, true);
1437 } else {
1438 if (tca[TCA_RATE]) {
1439 err = gen_replace_estimator(&cl->bstats, NULL,
1440 &cl->rate_est,
1441 NULL,
1442 qdisc_root_sleeping_running(sch),
1443 tca[TCA_RATE]);
1444 if (err)
1445 return err;
1446 }
1447 sch_tree_lock(sch);
1448 }
1449
1450 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1451
1452 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1453
1454 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1455 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1456
1457
1458
1459
1460 if (!cl->level) {
1461 u64 quantum = cl->rate.rate_bytes_ps;
1462
1463 do_div(quantum, q->rate2quantum);
1464 cl->quantum = min_t(u64, quantum, INT_MAX);
1465
1466 if (!hopt->quantum && cl->quantum < 1000) {
1467 warn = -1;
1468 cl->quantum = 1000;
1469 }
1470 if (!hopt->quantum && cl->quantum > 200000) {
1471 warn = 1;
1472 cl->quantum = 200000;
1473 }
1474 if (hopt->quantum)
1475 cl->quantum = hopt->quantum;
1476 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1477 cl->prio = TC_HTB_NUMPRIO - 1;
1478 }
1479
1480 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1481 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1482
1483 sch_tree_unlock(sch);
1484 qdisc_put(parent_qdisc);
1485
1486 if (warn)
1487 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1488 cl->common.classid, (warn == -1 ? "small" : "big"));
1489
1490 qdisc_class_hash_grow(sch, &q->clhash);
1491
1492 *arg = (unsigned long)cl;
1493 return 0;
1494
1495failure:
1496 return err;
1497}
1498
1499static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1500 struct netlink_ext_ack *extack)
1501{
1502 struct htb_sched *q = qdisc_priv(sch);
1503 struct htb_class *cl = (struct htb_class *)arg;
1504
1505 return cl ? cl->block : q->block;
1506}
1507
1508static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1509 u32 classid)
1510{
1511 struct htb_class *cl = htb_find(classid, sch);
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522 if (cl)
1523 cl->filter_cnt++;
1524 return (unsigned long)cl;
1525}
1526
1527static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1528{
1529 struct htb_class *cl = (struct htb_class *)arg;
1530
1531 if (cl)
1532 cl->filter_cnt--;
1533}
1534
1535static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1536{
1537 struct htb_sched *q = qdisc_priv(sch);
1538 struct htb_class *cl;
1539 unsigned int i;
1540
1541 if (arg->stop)
1542 return;
1543
1544 for (i = 0; i < q->clhash.hashsize; i++) {
1545 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1546 if (arg->count < arg->skip) {
1547 arg->count++;
1548 continue;
1549 }
1550 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1551 arg->stop = 1;
1552 return;
1553 }
1554 arg->count++;
1555 }
1556 }
1557}
1558
1559static const struct Qdisc_class_ops htb_class_ops = {
1560 .graft = htb_graft,
1561 .leaf = htb_leaf,
1562 .qlen_notify = htb_qlen_notify,
1563 .find = htb_search,
1564 .change = htb_change_class,
1565 .delete = htb_delete,
1566 .walk = htb_walk,
1567 .tcf_block = htb_tcf_block,
1568 .bind_tcf = htb_bind_filter,
1569 .unbind_tcf = htb_unbind_filter,
1570 .dump = htb_dump_class,
1571 .dump_stats = htb_dump_class_stats,
1572};
1573
1574static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1575 .cl_ops = &htb_class_ops,
1576 .id = "htb",
1577 .priv_size = sizeof(struct htb_sched),
1578 .enqueue = htb_enqueue,
1579 .dequeue = htb_dequeue,
1580 .peek = qdisc_peek_dequeued,
1581 .init = htb_init,
1582 .reset = htb_reset,
1583 .destroy = htb_destroy,
1584 .dump = htb_dump,
1585 .owner = THIS_MODULE,
1586};
1587
1588static int __init htb_module_init(void)
1589{
1590 return register_qdisc(&htb_qdisc_ops);
1591}
1592static void __exit htb_module_exit(void)
1593{
1594 unregister_qdisc(&htb_qdisc_ops);
1595}
1596
1597module_init(htb_module_init)
1598module_exit(htb_module_exit)
1599MODULE_LICENSE("GPL");
1600