1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/init.h>
11#include <linux/bitops.h>
12#include <linux/errno.h>
13#include <linux/netdevice.h>
14#include <linux/pkt_sched.h>
15#include <net/sch_generic.h>
16#include <net/pkt_sched.h>
17#include <net/pkt_cls.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94#define QFQ_MAX_SLOTS 32
95
96
97
98
99
100
101
102
103
104
105#define QFQ_MAX_INDEX 24
106#define QFQ_MAX_WSHIFT 10
107
108#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
109#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
110
111#define FRAC_BITS 30
112#define ONE_FP (1UL << FRAC_BITS)
113
114#define QFQ_MTU_SHIFT 16
115#define QFQ_MIN_LMAX 512
116
117#define QFQ_MAX_AGG_CLASSES 8
118
119
120
121
122
123enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
124
125struct qfq_group;
126
127struct qfq_aggregate;
128
129struct qfq_class {
130 struct Qdisc_class_common common;
131
132 unsigned int filter_cnt;
133
134 struct gnet_stats_basic_packed bstats;
135 struct gnet_stats_queue qstats;
136 struct net_rate_estimator __rcu *rate_est;
137 struct Qdisc *qdisc;
138 struct list_head alist;
139 struct qfq_aggregate *agg;
140 int deficit;
141};
142
143struct qfq_aggregate {
144 struct hlist_node next;
145 u64 S, F;
146
147
148
149
150
151 struct qfq_group *grp;
152
153
154 u32 class_weight;
155
156 int lmax;
157
158 u32 inv_w;
159 u32 budgetmax;
160 u32 initial_budget, budget;
161
162 int num_classes;
163 struct list_head active;
164
165 struct hlist_node nonfull_next;
166};
167
168struct qfq_group {
169 u64 S, F;
170 unsigned int slot_shift;
171 unsigned int index;
172 unsigned int front;
173 unsigned long full_slots;
174
175
176 struct hlist_head slots[QFQ_MAX_SLOTS];
177};
178
179struct qfq_sched {
180 struct tcf_proto __rcu *filter_list;
181 struct tcf_block *block;
182 struct Qdisc_class_hash clhash;
183
184 u64 oldV, V;
185 struct qfq_aggregate *in_serv_agg;
186 u32 wsum;
187 u32 iwsum;
188
189 unsigned long bitmaps[QFQ_MAX_STATE];
190 struct qfq_group groups[QFQ_MAX_INDEX + 1];
191 u32 min_slot_shift;
192
193 u32 max_agg_classes;
194 struct hlist_head nonfull_aggs;
195};
196
197
198
199
200
201
202
203
204enum update_reason {enqueue, requeue};
205
206static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
207{
208 struct qfq_sched *q = qdisc_priv(sch);
209 struct Qdisc_class_common *clc;
210
211 clc = qdisc_class_find(&q->clhash, classid);
212 if (clc == NULL)
213 return NULL;
214 return container_of(clc, struct qfq_class, common);
215}
216
217static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
218 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
219 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
220};
221
222
223
224
225
226
227static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
228{
229 u64 slot_size = (u64)maxlen * inv_w;
230 unsigned long size_map;
231 int index = 0;
232
233 size_map = slot_size >> min_slot_shift;
234 if (!size_map)
235 goto out;
236
237 index = __fls(size_map) + 1;
238 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
239
240 if (index < 0)
241 index = 0;
242out:
243 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
244 (unsigned long) ONE_FP/inv_w, maxlen, index);
245
246 return index;
247}
248
249static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
250static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
251 enum update_reason);
252
253static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
254 u32 lmax, u32 weight)
255{
256 INIT_LIST_HEAD(&agg->active);
257 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
258
259 agg->lmax = lmax;
260 agg->class_weight = weight;
261}
262
263static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
264 u32 lmax, u32 weight)
265{
266 struct qfq_aggregate *agg;
267
268 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
269 if (agg->lmax == lmax && agg->class_weight == weight)
270 return agg;
271
272 return NULL;
273}
274
275
276
277static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
278 int new_num_classes)
279{
280 u32 new_agg_weight;
281
282 if (new_num_classes == q->max_agg_classes)
283 hlist_del_init(&agg->nonfull_next);
284
285 if (agg->num_classes > new_num_classes &&
286 new_num_classes == q->max_agg_classes - 1)
287 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
288
289
290
291
292
293 agg->budgetmax = new_num_classes * agg->lmax;
294 new_agg_weight = agg->class_weight * new_num_classes;
295 agg->inv_w = ONE_FP/new_agg_weight;
296
297 if (agg->grp == NULL) {
298 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
299 q->min_slot_shift);
300 agg->grp = &q->groups[i];
301 }
302
303 q->wsum +=
304 (int) agg->class_weight * (new_num_classes - agg->num_classes);
305 q->iwsum = ONE_FP / q->wsum;
306
307 agg->num_classes = new_num_classes;
308}
309
310
311static void qfq_add_to_agg(struct qfq_sched *q,
312 struct qfq_aggregate *agg,
313 struct qfq_class *cl)
314{
315 cl->agg = agg;
316
317 qfq_update_agg(q, agg, agg->num_classes+1);
318 if (cl->qdisc->q.qlen > 0) {
319 list_add_tail(&cl->alist, &agg->active);
320 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
321 cl && q->in_serv_agg != agg)
322 qfq_activate_agg(q, agg, enqueue);
323 }
324}
325
326static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
327
328static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
329{
330 hlist_del_init(&agg->nonfull_next);
331 q->wsum -= agg->class_weight;
332 if (q->wsum != 0)
333 q->iwsum = ONE_FP / q->wsum;
334
335 if (q->in_serv_agg == agg)
336 q->in_serv_agg = qfq_choose_next_agg(q);
337 kfree(agg);
338}
339
340
341static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
342{
343 struct qfq_aggregate *agg = cl->agg;
344
345
346 list_del(&cl->alist);
347 if (list_empty(&agg->active))
348 qfq_deactivate_agg(q, agg);
349}
350
351
352static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
353{
354 struct qfq_aggregate *agg = cl->agg;
355
356 cl->agg = NULL;
357 if (agg->num_classes == 1) {
358 qfq_destroy_agg(q, agg);
359 return;
360 }
361 qfq_update_agg(q, agg, agg->num_classes-1);
362}
363
364
365static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
366{
367 if (cl->qdisc->q.qlen > 0)
368 qfq_deactivate_class(q, cl);
369
370 qfq_rm_from_agg(q, cl);
371}
372
373
374static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
375 u32 lmax)
376{
377 struct qfq_sched *q = qdisc_priv(sch);
378 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
379
380 if (new_agg == NULL) {
381 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
382 if (new_agg == NULL)
383 return -ENOBUFS;
384 qfq_init_agg(q, new_agg, lmax, weight);
385 }
386 qfq_deact_rm_from_agg(q, cl);
387 qfq_add_to_agg(q, new_agg, cl);
388
389 return 0;
390}
391
392static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
393 struct nlattr **tca, unsigned long *arg,
394 struct netlink_ext_ack *extack)
395{
396 struct qfq_sched *q = qdisc_priv(sch);
397 struct qfq_class *cl = (struct qfq_class *)*arg;
398 bool existing = false;
399 struct nlattr *tb[TCA_QFQ_MAX + 1];
400 struct qfq_aggregate *new_agg = NULL;
401 u32 weight, lmax, inv_w;
402 int err;
403 int delta_w;
404
405 if (tca[TCA_OPTIONS] == NULL) {
406 pr_notice("qfq: no options\n");
407 return -EINVAL;
408 }
409
410 err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
411 qfq_policy, NULL);
412 if (err < 0)
413 return err;
414
415 if (tb[TCA_QFQ_WEIGHT]) {
416 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
417 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
418 pr_notice("qfq: invalid weight %u\n", weight);
419 return -EINVAL;
420 }
421 } else
422 weight = 1;
423
424 if (tb[TCA_QFQ_LMAX]) {
425 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
426 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
427 pr_notice("qfq: invalid max length %u\n", lmax);
428 return -EINVAL;
429 }
430 } else
431 lmax = psched_mtu(qdisc_dev(sch));
432
433 inv_w = ONE_FP / weight;
434 weight = ONE_FP / inv_w;
435
436 if (cl != NULL &&
437 lmax == cl->agg->lmax &&
438 weight == cl->agg->class_weight)
439 return 0;
440
441 delta_w = weight - (cl ? cl->agg->class_weight : 0);
442
443 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
444 pr_notice("qfq: total weight out of range (%d + %u)\n",
445 delta_w, q->wsum);
446 return -EINVAL;
447 }
448
449 if (cl != NULL) {
450 if (tca[TCA_RATE]) {
451 err = gen_replace_estimator(&cl->bstats, NULL,
452 &cl->rate_est,
453 NULL,
454 qdisc_root_sleeping_running(sch),
455 tca[TCA_RATE]);
456 if (err)
457 return err;
458 }
459 existing = true;
460 goto set_change_agg;
461 }
462
463
464 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
465 if (cl == NULL)
466 return -ENOBUFS;
467
468 cl->common.classid = classid;
469 cl->deficit = lmax;
470
471 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
472 classid, NULL);
473 if (cl->qdisc == NULL)
474 cl->qdisc = &noop_qdisc;
475
476 if (tca[TCA_RATE]) {
477 err = gen_new_estimator(&cl->bstats, NULL,
478 &cl->rate_est,
479 NULL,
480 qdisc_root_sleeping_running(sch),
481 tca[TCA_RATE]);
482 if (err)
483 goto destroy_class;
484 }
485
486 if (cl->qdisc != &noop_qdisc)
487 qdisc_hash_add(cl->qdisc, true);
488
489set_change_agg:
490 sch_tree_lock(sch);
491 new_agg = qfq_find_agg(q, lmax, weight);
492 if (new_agg == NULL) {
493 sch_tree_unlock(sch);
494 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
495 if (new_agg == NULL) {
496 err = -ENOBUFS;
497 gen_kill_estimator(&cl->rate_est);
498 goto destroy_class;
499 }
500 sch_tree_lock(sch);
501 qfq_init_agg(q, new_agg, lmax, weight);
502 }
503 if (existing)
504 qfq_deact_rm_from_agg(q, cl);
505 else
506 qdisc_class_hash_insert(&q->clhash, &cl->common);
507 qfq_add_to_agg(q, new_agg, cl);
508 sch_tree_unlock(sch);
509 qdisc_class_hash_grow(sch, &q->clhash);
510
511 *arg = (unsigned long)cl;
512 return 0;
513
514destroy_class:
515 qdisc_put(cl->qdisc);
516 kfree(cl);
517 return err;
518}
519
520static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
521{
522 struct qfq_sched *q = qdisc_priv(sch);
523
524 qfq_rm_from_agg(q, cl);
525 gen_kill_estimator(&cl->rate_est);
526 qdisc_put(cl->qdisc);
527 kfree(cl);
528}
529
530static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
531 struct netlink_ext_ack *extack)
532{
533 struct qfq_sched *q = qdisc_priv(sch);
534 struct qfq_class *cl = (struct qfq_class *)arg;
535
536 if (cl->filter_cnt > 0)
537 return -EBUSY;
538
539 sch_tree_lock(sch);
540
541 qdisc_purge_queue(cl->qdisc);
542 qdisc_class_hash_remove(&q->clhash, &cl->common);
543
544 sch_tree_unlock(sch);
545
546 qfq_destroy_class(sch, cl);
547 return 0;
548}
549
550static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
551{
552 return (unsigned long)qfq_find_class(sch, classid);
553}
554
555static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
556 struct netlink_ext_ack *extack)
557{
558 struct qfq_sched *q = qdisc_priv(sch);
559
560 if (cl)
561 return NULL;
562
563 return q->block;
564}
565
566static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
567 u32 classid)
568{
569 struct qfq_class *cl = qfq_find_class(sch, classid);
570
571 if (cl != NULL)
572 cl->filter_cnt++;
573
574 return (unsigned long)cl;
575}
576
577static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
578{
579 struct qfq_class *cl = (struct qfq_class *)arg;
580
581 cl->filter_cnt--;
582}
583
584static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
585 struct Qdisc *new, struct Qdisc **old,
586 struct netlink_ext_ack *extack)
587{
588 struct qfq_class *cl = (struct qfq_class *)arg;
589
590 if (new == NULL) {
591 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
592 cl->common.classid, NULL);
593 if (new == NULL)
594 new = &noop_qdisc;
595 }
596
597 *old = qdisc_replace(sch, new, &cl->qdisc);
598 return 0;
599}
600
601static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
602{
603 struct qfq_class *cl = (struct qfq_class *)arg;
604
605 return cl->qdisc;
606}
607
608static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
609 struct sk_buff *skb, struct tcmsg *tcm)
610{
611 struct qfq_class *cl = (struct qfq_class *)arg;
612 struct nlattr *nest;
613
614 tcm->tcm_parent = TC_H_ROOT;
615 tcm->tcm_handle = cl->common.classid;
616 tcm->tcm_info = cl->qdisc->handle;
617
618 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
619 if (nest == NULL)
620 goto nla_put_failure;
621 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
622 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
623 goto nla_put_failure;
624 return nla_nest_end(skb, nest);
625
626nla_put_failure:
627 nla_nest_cancel(skb, nest);
628 return -EMSGSIZE;
629}
630
631static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
632 struct gnet_dump *d)
633{
634 struct qfq_class *cl = (struct qfq_class *)arg;
635 struct tc_qfq_stats xstats;
636
637 memset(&xstats, 0, sizeof(xstats));
638
639 xstats.weight = cl->agg->class_weight;
640 xstats.lmax = cl->agg->lmax;
641
642 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
643 d, NULL, &cl->bstats) < 0 ||
644 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
645 qdisc_qstats_copy(d, cl->qdisc) < 0)
646 return -1;
647
648 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
649}
650
651static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
652{
653 struct qfq_sched *q = qdisc_priv(sch);
654 struct qfq_class *cl;
655 unsigned int i;
656
657 if (arg->stop)
658 return;
659
660 for (i = 0; i < q->clhash.hashsize; i++) {
661 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
662 if (arg->count < arg->skip) {
663 arg->count++;
664 continue;
665 }
666 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
667 arg->stop = 1;
668 return;
669 }
670 arg->count++;
671 }
672 }
673}
674
675static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
676 int *qerr)
677{
678 struct qfq_sched *q = qdisc_priv(sch);
679 struct qfq_class *cl;
680 struct tcf_result res;
681 struct tcf_proto *fl;
682 int result;
683
684 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
685 pr_debug("qfq_classify: found %d\n", skb->priority);
686 cl = qfq_find_class(sch, skb->priority);
687 if (cl != NULL)
688 return cl;
689 }
690
691 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
692 fl = rcu_dereference_bh(q->filter_list);
693 result = tcf_classify(skb, NULL, fl, &res, false);
694 if (result >= 0) {
695#ifdef CONFIG_NET_CLS_ACT
696 switch (result) {
697 case TC_ACT_QUEUED:
698 case TC_ACT_STOLEN:
699 case TC_ACT_TRAP:
700 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
701 fallthrough;
702 case TC_ACT_SHOT:
703 return NULL;
704 }
705#endif
706 cl = (struct qfq_class *)res.class;
707 if (cl == NULL)
708 cl = qfq_find_class(sch, res.classid);
709 return cl;
710 }
711
712 return NULL;
713}
714
715
716static inline int qfq_gt(u64 a, u64 b)
717{
718 return (s64)(a - b) > 0;
719}
720
721
722static inline u64 qfq_round_down(u64 ts, unsigned int shift)
723{
724 return ts & ~((1ULL << shift) - 1);
725}
726
727
728static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
729 unsigned long bitmap)
730{
731 int index = __ffs(bitmap);
732 return &q->groups[index];
733}
734
735static inline unsigned long mask_from(unsigned long bitmap, int from)
736{
737 return bitmap & ~((1UL << from) - 1);
738}
739
740
741
742
743
744
745static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
746{
747
748 unsigned int state = qfq_gt(grp->S, q->V);
749 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
750 struct qfq_group *next;
751
752 if (mask) {
753 next = qfq_ffs(q, mask);
754 if (qfq_gt(grp->F, next->F))
755 state |= EB;
756 }
757
758 return state;
759}
760
761
762
763
764
765
766
767
768static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
769 int src, int dst)
770{
771 q->bitmaps[dst] |= q->bitmaps[src] & mask;
772 q->bitmaps[src] &= ~mask;
773}
774
775static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
776{
777 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
778 struct qfq_group *next;
779
780 if (mask) {
781 next = qfq_ffs(q, mask);
782 if (!qfq_gt(next->F, old_F))
783 return;
784 }
785
786 mask = (1UL << index) - 1;
787 qfq_move_groups(q, mask, EB, ER);
788 qfq_move_groups(q, mask, IB, IR);
789}
790
791
792
793
794
795
796
797
798
799
800
801static void qfq_make_eligible(struct qfq_sched *q)
802{
803 unsigned long vslot = q->V >> q->min_slot_shift;
804 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
805
806 if (vslot != old_vslot) {
807 unsigned long mask;
808 int last_flip_pos = fls(vslot ^ old_vslot);
809
810 if (last_flip_pos > 31)
811 mask = ~0UL;
812 else
813 mask = (1UL << last_flip_pos) - 1;
814
815 qfq_move_groups(q, mask, IR, ER);
816 qfq_move_groups(q, mask, IB, EB);
817 }
818}
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
876 u64 roundedS)
877{
878 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
879 unsigned int i;
880
881 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
882 u64 deltaS = roundedS - grp->S -
883 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
884 agg->S -= deltaS;
885 agg->F -= deltaS;
886 slot = QFQ_MAX_SLOTS - 2;
887 }
888
889 i = (grp->front + slot) % QFQ_MAX_SLOTS;
890
891 hlist_add_head(&agg->next, &grp->slots[i]);
892 __set_bit(slot, &grp->full_slots);
893}
894
895
896static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
897{
898 return hlist_entry(grp->slots[grp->front].first,
899 struct qfq_aggregate, next);
900}
901
902
903
904
905static void qfq_front_slot_remove(struct qfq_group *grp)
906{
907 struct qfq_aggregate *agg = qfq_slot_head(grp);
908
909 BUG_ON(!agg);
910 hlist_del(&agg->next);
911 if (hlist_empty(&grp->slots[grp->front]))
912 __clear_bit(0, &grp->full_slots);
913}
914
915
916
917
918
919
920static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
921{
922 unsigned int i;
923
924 pr_debug("qfq slot_scan: grp %u full %#lx\n",
925 grp->index, grp->full_slots);
926
927 if (grp->full_slots == 0)
928 return NULL;
929
930 i = __ffs(grp->full_slots);
931 if (i > 0) {
932 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
933 grp->full_slots >>= i;
934 }
935
936 return qfq_slot_head(grp);
937}
938
939
940
941
942
943
944
945
946
947
948static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
949{
950 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
951
952 grp->full_slots <<= i;
953 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
954}
955
956static void qfq_update_eligible(struct qfq_sched *q)
957{
958 struct qfq_group *grp;
959 unsigned long ineligible;
960
961 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
962 if (ineligible) {
963 if (!q->bitmaps[ER]) {
964 grp = qfq_ffs(q, ineligible);
965 if (qfq_gt(grp->S, q->V))
966 q->V = grp->S;
967 }
968 qfq_make_eligible(q);
969 }
970}
971
972
973static void agg_dequeue(struct qfq_aggregate *agg,
974 struct qfq_class *cl, unsigned int len)
975{
976 qdisc_dequeue_peeked(cl->qdisc);
977
978 cl->deficit -= (int) len;
979
980 if (cl->qdisc->q.qlen == 0)
981 list_del(&cl->alist);
982 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
983 cl->deficit += agg->lmax;
984 list_move_tail(&cl->alist, &agg->active);
985 }
986}
987
988static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
989 struct qfq_class **cl,
990 unsigned int *len)
991{
992 struct sk_buff *skb;
993
994 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
995 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
996 if (skb == NULL)
997 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
998 else
999 *len = qdisc_pkt_len(skb);
1000
1001 return skb;
1002}
1003
1004
1005static inline void charge_actual_service(struct qfq_aggregate *agg)
1006{
1007
1008
1009
1010
1011
1012 u32 service_received = min(agg->budgetmax,
1013 agg->initial_budget - agg->budget);
1014
1015 agg->F = agg->S + (u64)service_received * agg->inv_w;
1016}
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1031{
1032 unsigned long mask;
1033 u64 limit, roundedF;
1034 int slot_shift = agg->grp->slot_shift;
1035
1036 roundedF = qfq_round_down(agg->F, slot_shift);
1037 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1038
1039 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1040
1041 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1042 if (mask) {
1043 struct qfq_group *next = qfq_ffs(q, mask);
1044 if (qfq_gt(roundedF, next->F)) {
1045 if (qfq_gt(limit, next->F))
1046 agg->S = next->F;
1047 else
1048 agg->S = limit;
1049 return;
1050 }
1051 }
1052 agg->S = q->V;
1053 } else
1054 agg->S = agg->F;
1055}
1056
1057
1058
1059
1060
1061
1062static inline void
1063qfq_update_agg_ts(struct qfq_sched *q,
1064 struct qfq_aggregate *agg, enum update_reason reason)
1065{
1066 if (reason != requeue)
1067 qfq_update_start(q, agg);
1068 else
1069 agg->S = agg->F;
1070
1071 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1072}
1073
1074static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1075
1076static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1077{
1078 struct qfq_sched *q = qdisc_priv(sch);
1079 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1080 struct qfq_class *cl;
1081 struct sk_buff *skb = NULL;
1082
1083 unsigned int len = 0;
1084
1085 if (in_serv_agg == NULL)
1086 return NULL;
1087
1088 if (!list_empty(&in_serv_agg->active))
1089 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1090
1091
1092
1093
1094
1095
1096 if (len == 0 || in_serv_agg->budget < len) {
1097 charge_actual_service(in_serv_agg);
1098
1099
1100 in_serv_agg->initial_budget = in_serv_agg->budget =
1101 in_serv_agg->budgetmax;
1102
1103 if (!list_empty(&in_serv_agg->active)) {
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 qfq_update_agg_ts(q, in_serv_agg, requeue);
1115 qfq_schedule_agg(q, in_serv_agg);
1116 } else if (sch->q.qlen == 0) {
1117 q->in_serv_agg = NULL;
1118 return NULL;
1119 }
1120
1121
1122
1123
1124
1125 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1126 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1127 }
1128 if (!skb)
1129 return NULL;
1130
1131 qdisc_qstats_backlog_dec(sch, skb);
1132 sch->q.qlen--;
1133 qdisc_bstats_update(sch, skb);
1134
1135 agg_dequeue(in_serv_agg, cl, len);
1136
1137
1138
1139
1140 if (unlikely(in_serv_agg->budget < len))
1141 in_serv_agg->budget = 0;
1142 else
1143 in_serv_agg->budget -= len;
1144
1145 q->V += (u64)len * q->iwsum;
1146 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1147 len, (unsigned long long) in_serv_agg->F,
1148 (unsigned long long) q->V);
1149
1150 return skb;
1151}
1152
1153static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1154{
1155 struct qfq_group *grp;
1156 struct qfq_aggregate *agg, *new_front_agg;
1157 u64 old_F;
1158
1159 qfq_update_eligible(q);
1160 q->oldV = q->V;
1161
1162 if (!q->bitmaps[ER])
1163 return NULL;
1164
1165 grp = qfq_ffs(q, q->bitmaps[ER]);
1166 old_F = grp->F;
1167
1168 agg = qfq_slot_head(grp);
1169
1170
1171 qfq_front_slot_remove(grp);
1172
1173 new_front_agg = qfq_slot_scan(grp);
1174
1175 if (new_front_agg == NULL)
1176 __clear_bit(grp->index, &q->bitmaps[ER]);
1177 else {
1178 u64 roundedS = qfq_round_down(new_front_agg->S,
1179 grp->slot_shift);
1180 unsigned int s;
1181
1182 if (grp->S == roundedS)
1183 return agg;
1184 grp->S = roundedS;
1185 grp->F = roundedS + (2ULL << grp->slot_shift);
1186 __clear_bit(grp->index, &q->bitmaps[ER]);
1187 s = qfq_calc_state(q, grp);
1188 __set_bit(grp->index, &q->bitmaps[s]);
1189 }
1190
1191 qfq_unblock_groups(q, grp->index, old_F);
1192
1193 return agg;
1194}
1195
1196static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1197 struct sk_buff **to_free)
1198{
1199 unsigned int len = qdisc_pkt_len(skb), gso_segs;
1200 struct qfq_sched *q = qdisc_priv(sch);
1201 struct qfq_class *cl;
1202 struct qfq_aggregate *agg;
1203 int err = 0;
1204 bool first;
1205
1206 cl = qfq_classify(skb, sch, &err);
1207 if (cl == NULL) {
1208 if (err & __NET_XMIT_BYPASS)
1209 qdisc_qstats_drop(sch);
1210 __qdisc_drop(skb, to_free);
1211 return err;
1212 }
1213 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1214
1215 if (unlikely(cl->agg->lmax < len)) {
1216 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1217 cl->agg->lmax, len, cl->common.classid);
1218 err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
1219 if (err) {
1220 cl->qstats.drops++;
1221 return qdisc_drop(skb, sch, to_free);
1222 }
1223 }
1224
1225 gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
1226 first = !cl->qdisc->q.qlen;
1227 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1228 if (unlikely(err != NET_XMIT_SUCCESS)) {
1229 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1230 if (net_xmit_drop_count(err)) {
1231 cl->qstats.drops++;
1232 qdisc_qstats_drop(sch);
1233 }
1234 return err;
1235 }
1236
1237 cl->bstats.bytes += len;
1238 cl->bstats.packets += gso_segs;
1239 sch->qstats.backlog += len;
1240 ++sch->q.qlen;
1241
1242 agg = cl->agg;
1243
1244 if (!first) {
1245 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1246 list_first_entry(&agg->active, struct qfq_class, alist)
1247 == cl && cl->deficit < len)
1248 list_move_tail(&cl->alist, &agg->active);
1249
1250 return err;
1251 }
1252
1253
1254 cl->deficit = agg->lmax;
1255 list_add_tail(&cl->alist, &agg->active);
1256
1257 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1258 q->in_serv_agg == agg)
1259 return err;
1260
1261 qfq_activate_agg(q, agg, enqueue);
1262
1263 return err;
1264}
1265
1266
1267
1268
1269static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1270{
1271 struct qfq_group *grp = agg->grp;
1272 u64 roundedS;
1273 int s;
1274
1275 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286 if (grp->full_slots) {
1287 if (!qfq_gt(grp->S, agg->S))
1288 goto skip_update;
1289
1290
1291 qfq_slot_rotate(grp, roundedS);
1292
1293 __clear_bit(grp->index, &q->bitmaps[IR]);
1294 __clear_bit(grp->index, &q->bitmaps[IB]);
1295 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1296 q->in_serv_agg == NULL)
1297 q->V = roundedS;
1298
1299 grp->S = roundedS;
1300 grp->F = roundedS + (2ULL << grp->slot_shift);
1301 s = qfq_calc_state(q, grp);
1302 __set_bit(grp->index, &q->bitmaps[s]);
1303
1304 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1305 s, q->bitmaps[s],
1306 (unsigned long long) agg->S,
1307 (unsigned long long) agg->F,
1308 (unsigned long long) q->V);
1309
1310skip_update:
1311 qfq_slot_insert(grp, agg, roundedS);
1312}
1313
1314
1315
1316static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1317 enum update_reason reason)
1318{
1319 agg->initial_budget = agg->budget = agg->budgetmax;
1320
1321 qfq_update_agg_ts(q, agg, reason);
1322 if (q->in_serv_agg == NULL) {
1323 q->in_serv_agg = agg;
1324
1325 q->oldV = q->V = agg->S;
1326 } else if (agg != q->in_serv_agg)
1327 qfq_schedule_agg(q, agg);
1328}
1329
1330static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1331 struct qfq_aggregate *agg)
1332{
1333 unsigned int i, offset;
1334 u64 roundedS;
1335
1336 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1337 offset = (roundedS - grp->S) >> grp->slot_shift;
1338
1339 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1340
1341 hlist_del(&agg->next);
1342 if (hlist_empty(&grp->slots[i]))
1343 __clear_bit(offset, &grp->full_slots);
1344}
1345
1346
1347
1348
1349
1350
1351
1352
1353static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1354{
1355 struct qfq_group *grp = agg->grp;
1356 unsigned long mask;
1357 u64 roundedS;
1358 int s;
1359
1360 if (agg == q->in_serv_agg) {
1361 charge_actual_service(agg);
1362 q->in_serv_agg = qfq_choose_next_agg(q);
1363 return;
1364 }
1365
1366 agg->F = agg->S;
1367 qfq_slot_remove(q, grp, agg);
1368
1369 if (!grp->full_slots) {
1370 __clear_bit(grp->index, &q->bitmaps[IR]);
1371 __clear_bit(grp->index, &q->bitmaps[EB]);
1372 __clear_bit(grp->index, &q->bitmaps[IB]);
1373
1374 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1375 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1376 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1377 if (mask)
1378 mask = ~((1UL << __fls(mask)) - 1);
1379 else
1380 mask = ~0UL;
1381 qfq_move_groups(q, mask, EB, ER);
1382 qfq_move_groups(q, mask, IB, IR);
1383 }
1384 __clear_bit(grp->index, &q->bitmaps[ER]);
1385 } else if (hlist_empty(&grp->slots[grp->front])) {
1386 agg = qfq_slot_scan(grp);
1387 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1388 if (grp->S != roundedS) {
1389 __clear_bit(grp->index, &q->bitmaps[ER]);
1390 __clear_bit(grp->index, &q->bitmaps[IR]);
1391 __clear_bit(grp->index, &q->bitmaps[EB]);
1392 __clear_bit(grp->index, &q->bitmaps[IB]);
1393 grp->S = roundedS;
1394 grp->F = roundedS + (2ULL << grp->slot_shift);
1395 s = qfq_calc_state(q, grp);
1396 __set_bit(grp->index, &q->bitmaps[s]);
1397 }
1398 }
1399}
1400
1401static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1402{
1403 struct qfq_sched *q = qdisc_priv(sch);
1404 struct qfq_class *cl = (struct qfq_class *)arg;
1405
1406 qfq_deactivate_class(q, cl);
1407}
1408
1409static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1410 struct netlink_ext_ack *extack)
1411{
1412 struct qfq_sched *q = qdisc_priv(sch);
1413 struct qfq_group *grp;
1414 int i, j, err;
1415 u32 max_cl_shift, maxbudg_shift, max_classes;
1416
1417 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1418 if (err)
1419 return err;
1420
1421 err = qdisc_class_hash_init(&q->clhash);
1422 if (err < 0)
1423 return err;
1424
1425 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1426 max_classes = QFQ_MAX_AGG_CLASSES;
1427 else
1428 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1429
1430 max_cl_shift = __fls(max_classes);
1431 q->max_agg_classes = 1<<max_cl_shift;
1432
1433
1434 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1435 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1436
1437 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1438 grp = &q->groups[i];
1439 grp->index = i;
1440 grp->slot_shift = q->min_slot_shift + i;
1441 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1442 INIT_HLIST_HEAD(&grp->slots[j]);
1443 }
1444
1445 INIT_HLIST_HEAD(&q->nonfull_aggs);
1446
1447 return 0;
1448}
1449
1450static void qfq_reset_qdisc(struct Qdisc *sch)
1451{
1452 struct qfq_sched *q = qdisc_priv(sch);
1453 struct qfq_class *cl;
1454 unsigned int i;
1455
1456 for (i = 0; i < q->clhash.hashsize; i++) {
1457 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1458 if (cl->qdisc->q.qlen > 0)
1459 qfq_deactivate_class(q, cl);
1460
1461 qdisc_reset(cl->qdisc);
1462 }
1463 }
1464 sch->qstats.backlog = 0;
1465 sch->q.qlen = 0;
1466}
1467
1468static void qfq_destroy_qdisc(struct Qdisc *sch)
1469{
1470 struct qfq_sched *q = qdisc_priv(sch);
1471 struct qfq_class *cl;
1472 struct hlist_node *next;
1473 unsigned int i;
1474
1475 tcf_block_put(q->block);
1476
1477 for (i = 0; i < q->clhash.hashsize; i++) {
1478 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1479 common.hnode) {
1480 qfq_destroy_class(sch, cl);
1481 }
1482 }
1483 qdisc_class_hash_destroy(&q->clhash);
1484}
1485
1486static const struct Qdisc_class_ops qfq_class_ops = {
1487 .change = qfq_change_class,
1488 .delete = qfq_delete_class,
1489 .find = qfq_search_class,
1490 .tcf_block = qfq_tcf_block,
1491 .bind_tcf = qfq_bind_tcf,
1492 .unbind_tcf = qfq_unbind_tcf,
1493 .graft = qfq_graft_class,
1494 .leaf = qfq_class_leaf,
1495 .qlen_notify = qfq_qlen_notify,
1496 .dump = qfq_dump_class,
1497 .dump_stats = qfq_dump_class_stats,
1498 .walk = qfq_walk,
1499};
1500
1501static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1502 .cl_ops = &qfq_class_ops,
1503 .id = "qfq",
1504 .priv_size = sizeof(struct qfq_sched),
1505 .enqueue = qfq_enqueue,
1506 .dequeue = qfq_dequeue,
1507 .peek = qdisc_peek_dequeued,
1508 .init = qfq_init_qdisc,
1509 .reset = qfq_reset_qdisc,
1510 .destroy = qfq_destroy_qdisc,
1511 .owner = THIS_MODULE,
1512};
1513
1514static int __init qfq_init(void)
1515{
1516 return register_qdisc(&qfq_qdisc_ops);
1517}
1518
1519static void __exit qfq_exit(void)
1520{
1521 unregister_qdisc(&qfq_qdisc_ops);
1522}
1523
1524module_init(qfq_init);
1525module_exit(qfq_exit);
1526MODULE_LICENSE("GPL");
1527