1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/init.h>
11#include <linux/bitops.h>
12#include <linux/errno.h>
13#include <linux/netdevice.h>
14#include <linux/pkt_sched.h>
15#include <net/sch_generic.h>
16#include <net/pkt_sched.h>
17#include <net/pkt_cls.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94#define QFQ_MAX_SLOTS 32
95
96
97
98
99
100
101
102
103
104
105#define QFQ_MAX_INDEX 24
106#define QFQ_MAX_WSHIFT 10
107
108#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
109#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
110
111#define FRAC_BITS 30
112#define ONE_FP (1UL << FRAC_BITS)
113
114#define QFQ_MTU_SHIFT 16
115#define QFQ_MIN_LMAX 512
116
117#define QFQ_MAX_AGG_CLASSES 8
118
119
120
121
122
123enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
124
125struct qfq_group;
126
127struct qfq_aggregate;
128
129struct qfq_class {
130 struct Qdisc_class_common common;
131
132 unsigned int filter_cnt;
133
134 struct gnet_stats_basic_packed bstats;
135 struct gnet_stats_queue qstats;
136 struct net_rate_estimator __rcu *rate_est;
137 struct Qdisc *qdisc;
138 struct list_head alist;
139 struct qfq_aggregate *agg;
140 int deficit;
141};
142
143struct qfq_aggregate {
144 struct hlist_node next;
145 u64 S, F;
146
147
148
149
150
151 struct qfq_group *grp;
152
153
154 u32 class_weight;
155
156 int lmax;
157
158 u32 inv_w;
159 u32 budgetmax;
160 u32 initial_budget, budget;
161
162 int num_classes;
163 struct list_head active;
164
165 struct hlist_node nonfull_next;
166};
167
168struct qfq_group {
169 u64 S, F;
170 unsigned int slot_shift;
171 unsigned int index;
172 unsigned int front;
173 unsigned long full_slots;
174
175
176 struct hlist_head slots[QFQ_MAX_SLOTS];
177};
178
179struct qfq_sched {
180 struct tcf_proto __rcu *filter_list;
181 struct tcf_block *block;
182 struct Qdisc_class_hash clhash;
183
184 u64 oldV, V;
185 struct qfq_aggregate *in_serv_agg;
186 u32 wsum;
187 u32 iwsum;
188
189 unsigned long bitmaps[QFQ_MAX_STATE];
190 struct qfq_group groups[QFQ_MAX_INDEX + 1];
191 u32 min_slot_shift;
192
193 u32 max_agg_classes;
194 struct hlist_head nonfull_aggs;
195};
196
197
198
199
200
201
202
203
204enum update_reason {enqueue, requeue};
205
206static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
207{
208 struct qfq_sched *q = qdisc_priv(sch);
209 struct Qdisc_class_common *clc;
210
211 clc = qdisc_class_find(&q->clhash, classid);
212 if (clc == NULL)
213 return NULL;
214 return container_of(clc, struct qfq_class, common);
215}
216
217static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
218 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
219 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
220};
221
222
223
224
225
226
227static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
228{
229 u64 slot_size = (u64)maxlen * inv_w;
230 unsigned long size_map;
231 int index = 0;
232
233 size_map = slot_size >> min_slot_shift;
234 if (!size_map)
235 goto out;
236
237 index = __fls(size_map) + 1;
238 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
239
240 if (index < 0)
241 index = 0;
242out:
243 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
244 (unsigned long) ONE_FP/inv_w, maxlen, index);
245
246 return index;
247}
248
249static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
250static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
251 enum update_reason);
252
253static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
254 u32 lmax, u32 weight)
255{
256 INIT_LIST_HEAD(&agg->active);
257 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
258
259 agg->lmax = lmax;
260 agg->class_weight = weight;
261}
262
263static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
264 u32 lmax, u32 weight)
265{
266 struct qfq_aggregate *agg;
267
268 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
269 if (agg->lmax == lmax && agg->class_weight == weight)
270 return agg;
271
272 return NULL;
273}
274
275
276
277static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
278 int new_num_classes)
279{
280 u32 new_agg_weight;
281
282 if (new_num_classes == q->max_agg_classes)
283 hlist_del_init(&agg->nonfull_next);
284
285 if (agg->num_classes > new_num_classes &&
286 new_num_classes == q->max_agg_classes - 1)
287 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
288
289
290
291
292
293 agg->budgetmax = new_num_classes * agg->lmax;
294 new_agg_weight = agg->class_weight * new_num_classes;
295 agg->inv_w = ONE_FP/new_agg_weight;
296
297 if (agg->grp == NULL) {
298 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
299 q->min_slot_shift);
300 agg->grp = &q->groups[i];
301 }
302
303 q->wsum +=
304 (int) agg->class_weight * (new_num_classes - agg->num_classes);
305 q->iwsum = ONE_FP / q->wsum;
306
307 agg->num_classes = new_num_classes;
308}
309
310
311static void qfq_add_to_agg(struct qfq_sched *q,
312 struct qfq_aggregate *agg,
313 struct qfq_class *cl)
314{
315 cl->agg = agg;
316
317 qfq_update_agg(q, agg, agg->num_classes+1);
318 if (cl->qdisc->q.qlen > 0) {
319 list_add_tail(&cl->alist, &agg->active);
320 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
321 cl && q->in_serv_agg != agg)
322 qfq_activate_agg(q, agg, enqueue);
323 }
324}
325
326static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
327
328static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
329{
330 hlist_del_init(&agg->nonfull_next);
331 q->wsum -= agg->class_weight;
332 if (q->wsum != 0)
333 q->iwsum = ONE_FP / q->wsum;
334
335 if (q->in_serv_agg == agg)
336 q->in_serv_agg = qfq_choose_next_agg(q);
337 kfree(agg);
338}
339
340
341static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
342{
343 struct qfq_aggregate *agg = cl->agg;
344
345
346 list_del(&cl->alist);
347 if (list_empty(&agg->active))
348 qfq_deactivate_agg(q, agg);
349}
350
351
352static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
353{
354 struct qfq_aggregate *agg = cl->agg;
355
356 cl->agg = NULL;
357 if (agg->num_classes == 1) {
358 qfq_destroy_agg(q, agg);
359 return;
360 }
361 qfq_update_agg(q, agg, agg->num_classes-1);
362}
363
364
365static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
366{
367 if (cl->qdisc->q.qlen > 0)
368 qfq_deactivate_class(q, cl);
369
370 qfq_rm_from_agg(q, cl);
371}
372
373
374static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
375 u32 lmax)
376{
377 struct qfq_sched *q = qdisc_priv(sch);
378 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
379
380 if (new_agg == NULL) {
381 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
382 if (new_agg == NULL)
383 return -ENOBUFS;
384 qfq_init_agg(q, new_agg, lmax, weight);
385 }
386 qfq_deact_rm_from_agg(q, cl);
387 qfq_add_to_agg(q, new_agg, cl);
388
389 return 0;
390}
391
392static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
393 struct nlattr **tca, unsigned long *arg,
394 struct netlink_ext_ack *extack)
395{
396 struct qfq_sched *q = qdisc_priv(sch);
397 struct qfq_class *cl = (struct qfq_class *)*arg;
398 bool existing = false;
399 struct nlattr *tb[TCA_QFQ_MAX + 1];
400 struct qfq_aggregate *new_agg = NULL;
401 u32 weight, lmax, inv_w;
402 int err;
403 int delta_w;
404
405 if (tca[TCA_OPTIONS] == NULL) {
406 pr_notice("qfq: no options\n");
407 return -EINVAL;
408 }
409
410 err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
411 qfq_policy, NULL);
412 if (err < 0)
413 return err;
414
415 if (tb[TCA_QFQ_WEIGHT]) {
416 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
417 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
418 pr_notice("qfq: invalid weight %u\n", weight);
419 return -EINVAL;
420 }
421 } else
422 weight = 1;
423
424 if (tb[TCA_QFQ_LMAX]) {
425 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
426 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
427 pr_notice("qfq: invalid max length %u\n", lmax);
428 return -EINVAL;
429 }
430 } else
431 lmax = psched_mtu(qdisc_dev(sch));
432
433 inv_w = ONE_FP / weight;
434 weight = ONE_FP / inv_w;
435
436 if (cl != NULL &&
437 lmax == cl->agg->lmax &&
438 weight == cl->agg->class_weight)
439 return 0;
440
441 delta_w = weight - (cl ? cl->agg->class_weight : 0);
442
443 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
444 pr_notice("qfq: total weight out of range (%d + %u)\n",
445 delta_w, q->wsum);
446 return -EINVAL;
447 }
448
449 if (cl != NULL) {
450 if (tca[TCA_RATE]) {
451 err = gen_replace_estimator(&cl->bstats, NULL,
452 &cl->rate_est,
453 NULL,
454 qdisc_root_sleeping_running(sch),
455 tca[TCA_RATE]);
456 if (err)
457 return err;
458 }
459 existing = true;
460 goto set_change_agg;
461 }
462
463
464 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
465 if (cl == NULL)
466 return -ENOBUFS;
467
468 cl->common.classid = classid;
469 cl->deficit = lmax;
470
471 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
472 classid, NULL);
473 if (cl->qdisc == NULL)
474 cl->qdisc = &noop_qdisc;
475
476 if (tca[TCA_RATE]) {
477 err = gen_new_estimator(&cl->bstats, NULL,
478 &cl->rate_est,
479 NULL,
480 qdisc_root_sleeping_running(sch),
481 tca[TCA_RATE]);
482 if (err)
483 goto destroy_class;
484 }
485
486 if (cl->qdisc != &noop_qdisc)
487 qdisc_hash_add(cl->qdisc, true);
488 sch_tree_lock(sch);
489 qdisc_class_hash_insert(&q->clhash, &cl->common);
490 sch_tree_unlock(sch);
491
492 qdisc_class_hash_grow(sch, &q->clhash);
493
494set_change_agg:
495 sch_tree_lock(sch);
496 new_agg = qfq_find_agg(q, lmax, weight);
497 if (new_agg == NULL) {
498 sch_tree_unlock(sch);
499 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
500 if (new_agg == NULL) {
501 err = -ENOBUFS;
502 gen_kill_estimator(&cl->rate_est);
503 goto destroy_class;
504 }
505 sch_tree_lock(sch);
506 qfq_init_agg(q, new_agg, lmax, weight);
507 }
508 if (existing)
509 qfq_deact_rm_from_agg(q, cl);
510 qfq_add_to_agg(q, new_agg, cl);
511 sch_tree_unlock(sch);
512
513 *arg = (unsigned long)cl;
514 return 0;
515
516destroy_class:
517 qdisc_put(cl->qdisc);
518 kfree(cl);
519 return err;
520}
521
522static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
523{
524 struct qfq_sched *q = qdisc_priv(sch);
525
526 qfq_rm_from_agg(q, cl);
527 gen_kill_estimator(&cl->rate_est);
528 qdisc_put(cl->qdisc);
529 kfree(cl);
530}
531
532static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
533 struct netlink_ext_ack *extack)
534{
535 struct qfq_sched *q = qdisc_priv(sch);
536 struct qfq_class *cl = (struct qfq_class *)arg;
537
538 if (cl->filter_cnt > 0)
539 return -EBUSY;
540
541 sch_tree_lock(sch);
542
543 qdisc_purge_queue(cl->qdisc);
544 qdisc_class_hash_remove(&q->clhash, &cl->common);
545
546 sch_tree_unlock(sch);
547
548 qfq_destroy_class(sch, cl);
549 return 0;
550}
551
552static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
553{
554 return (unsigned long)qfq_find_class(sch, classid);
555}
556
557static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
558 struct netlink_ext_ack *extack)
559{
560 struct qfq_sched *q = qdisc_priv(sch);
561
562 if (cl)
563 return NULL;
564
565 return q->block;
566}
567
568static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
569 u32 classid)
570{
571 struct qfq_class *cl = qfq_find_class(sch, classid);
572
573 if (cl != NULL)
574 cl->filter_cnt++;
575
576 return (unsigned long)cl;
577}
578
579static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
580{
581 struct qfq_class *cl = (struct qfq_class *)arg;
582
583 cl->filter_cnt--;
584}
585
586static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
587 struct Qdisc *new, struct Qdisc **old,
588 struct netlink_ext_ack *extack)
589{
590 struct qfq_class *cl = (struct qfq_class *)arg;
591
592 if (new == NULL) {
593 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
594 cl->common.classid, NULL);
595 if (new == NULL)
596 new = &noop_qdisc;
597 }
598
599 *old = qdisc_replace(sch, new, &cl->qdisc);
600 return 0;
601}
602
603static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
604{
605 struct qfq_class *cl = (struct qfq_class *)arg;
606
607 return cl->qdisc;
608}
609
610static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
611 struct sk_buff *skb, struct tcmsg *tcm)
612{
613 struct qfq_class *cl = (struct qfq_class *)arg;
614 struct nlattr *nest;
615
616 tcm->tcm_parent = TC_H_ROOT;
617 tcm->tcm_handle = cl->common.classid;
618 tcm->tcm_info = cl->qdisc->handle;
619
620 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
621 if (nest == NULL)
622 goto nla_put_failure;
623 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
624 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
625 goto nla_put_failure;
626 return nla_nest_end(skb, nest);
627
628nla_put_failure:
629 nla_nest_cancel(skb, nest);
630 return -EMSGSIZE;
631}
632
633static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
634 struct gnet_dump *d)
635{
636 struct qfq_class *cl = (struct qfq_class *)arg;
637 struct tc_qfq_stats xstats;
638
639 memset(&xstats, 0, sizeof(xstats));
640
641 xstats.weight = cl->agg->class_weight;
642 xstats.lmax = cl->agg->lmax;
643
644 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
645 d, NULL, &cl->bstats) < 0 ||
646 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
647 qdisc_qstats_copy(d, cl->qdisc) < 0)
648 return -1;
649
650 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
651}
652
653static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
654{
655 struct qfq_sched *q = qdisc_priv(sch);
656 struct qfq_class *cl;
657 unsigned int i;
658
659 if (arg->stop)
660 return;
661
662 for (i = 0; i < q->clhash.hashsize; i++) {
663 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
664 if (arg->count < arg->skip) {
665 arg->count++;
666 continue;
667 }
668 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
669 arg->stop = 1;
670 return;
671 }
672 arg->count++;
673 }
674 }
675}
676
677static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
678 int *qerr)
679{
680 struct qfq_sched *q = qdisc_priv(sch);
681 struct qfq_class *cl;
682 struct tcf_result res;
683 struct tcf_proto *fl;
684 int result;
685
686 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
687 pr_debug("qfq_classify: found %d\n", skb->priority);
688 cl = qfq_find_class(sch, skb->priority);
689 if (cl != NULL)
690 return cl;
691 }
692
693 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
694 fl = rcu_dereference_bh(q->filter_list);
695 result = tcf_classify(skb, fl, &res, false);
696 if (result >= 0) {
697#ifdef CONFIG_NET_CLS_ACT
698 switch (result) {
699 case TC_ACT_QUEUED:
700 case TC_ACT_STOLEN:
701 case TC_ACT_TRAP:
702 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
703 fallthrough;
704 case TC_ACT_SHOT:
705 return NULL;
706 }
707#endif
708 cl = (struct qfq_class *)res.class;
709 if (cl == NULL)
710 cl = qfq_find_class(sch, res.classid);
711 return cl;
712 }
713
714 return NULL;
715}
716
717
718static inline int qfq_gt(u64 a, u64 b)
719{
720 return (s64)(a - b) > 0;
721}
722
723
724static inline u64 qfq_round_down(u64 ts, unsigned int shift)
725{
726 return ts & ~((1ULL << shift) - 1);
727}
728
729
730static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
731 unsigned long bitmap)
732{
733 int index = __ffs(bitmap);
734 return &q->groups[index];
735}
736
737static inline unsigned long mask_from(unsigned long bitmap, int from)
738{
739 return bitmap & ~((1UL << from) - 1);
740}
741
742
743
744
745
746
747static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
748{
749
750 unsigned int state = qfq_gt(grp->S, q->V);
751 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
752 struct qfq_group *next;
753
754 if (mask) {
755 next = qfq_ffs(q, mask);
756 if (qfq_gt(grp->F, next->F))
757 state |= EB;
758 }
759
760 return state;
761}
762
763
764
765
766
767
768
769
770static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
771 int src, int dst)
772{
773 q->bitmaps[dst] |= q->bitmaps[src] & mask;
774 q->bitmaps[src] &= ~mask;
775}
776
777static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
778{
779 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
780 struct qfq_group *next;
781
782 if (mask) {
783 next = qfq_ffs(q, mask);
784 if (!qfq_gt(next->F, old_F))
785 return;
786 }
787
788 mask = (1UL << index) - 1;
789 qfq_move_groups(q, mask, EB, ER);
790 qfq_move_groups(q, mask, IB, IR);
791}
792
793
794
795
796
797
798
799
800
801
802
803static void qfq_make_eligible(struct qfq_sched *q)
804{
805 unsigned long vslot = q->V >> q->min_slot_shift;
806 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
807
808 if (vslot != old_vslot) {
809 unsigned long mask;
810 int last_flip_pos = fls(vslot ^ old_vslot);
811
812 if (last_flip_pos > 31)
813 mask = ~0UL;
814 else
815 mask = (1UL << last_flip_pos) - 1;
816
817 qfq_move_groups(q, mask, IR, ER);
818 qfq_move_groups(q, mask, IB, EB);
819 }
820}
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
878 u64 roundedS)
879{
880 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
881 unsigned int i;
882
883 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
884 u64 deltaS = roundedS - grp->S -
885 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
886 agg->S -= deltaS;
887 agg->F -= deltaS;
888 slot = QFQ_MAX_SLOTS - 2;
889 }
890
891 i = (grp->front + slot) % QFQ_MAX_SLOTS;
892
893 hlist_add_head(&agg->next, &grp->slots[i]);
894 __set_bit(slot, &grp->full_slots);
895}
896
897
898static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
899{
900 return hlist_entry(grp->slots[grp->front].first,
901 struct qfq_aggregate, next);
902}
903
904
905
906
907static void qfq_front_slot_remove(struct qfq_group *grp)
908{
909 struct qfq_aggregate *agg = qfq_slot_head(grp);
910
911 BUG_ON(!agg);
912 hlist_del(&agg->next);
913 if (hlist_empty(&grp->slots[grp->front]))
914 __clear_bit(0, &grp->full_slots);
915}
916
917
918
919
920
921
922static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
923{
924 unsigned int i;
925
926 pr_debug("qfq slot_scan: grp %u full %#lx\n",
927 grp->index, grp->full_slots);
928
929 if (grp->full_slots == 0)
930 return NULL;
931
932 i = __ffs(grp->full_slots);
933 if (i > 0) {
934 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
935 grp->full_slots >>= i;
936 }
937
938 return qfq_slot_head(grp);
939}
940
941
942
943
944
945
946
947
948
949
950static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
951{
952 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
953
954 grp->full_slots <<= i;
955 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
956}
957
958static void qfq_update_eligible(struct qfq_sched *q)
959{
960 struct qfq_group *grp;
961 unsigned long ineligible;
962
963 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
964 if (ineligible) {
965 if (!q->bitmaps[ER]) {
966 grp = qfq_ffs(q, ineligible);
967 if (qfq_gt(grp->S, q->V))
968 q->V = grp->S;
969 }
970 qfq_make_eligible(q);
971 }
972}
973
974
975static void agg_dequeue(struct qfq_aggregate *agg,
976 struct qfq_class *cl, unsigned int len)
977{
978 qdisc_dequeue_peeked(cl->qdisc);
979
980 cl->deficit -= (int) len;
981
982 if (cl->qdisc->q.qlen == 0)
983 list_del(&cl->alist);
984 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
985 cl->deficit += agg->lmax;
986 list_move_tail(&cl->alist, &agg->active);
987 }
988}
989
990static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
991 struct qfq_class **cl,
992 unsigned int *len)
993{
994 struct sk_buff *skb;
995
996 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
997 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
998 if (skb == NULL)
999 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1000 else
1001 *len = qdisc_pkt_len(skb);
1002
1003 return skb;
1004}
1005
1006
1007static inline void charge_actual_service(struct qfq_aggregate *agg)
1008{
1009
1010
1011
1012
1013
1014 u32 service_received = min(agg->budgetmax,
1015 agg->initial_budget - agg->budget);
1016
1017 agg->F = agg->S + (u64)service_received * agg->inv_w;
1018}
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1033{
1034 unsigned long mask;
1035 u64 limit, roundedF;
1036 int slot_shift = agg->grp->slot_shift;
1037
1038 roundedF = qfq_round_down(agg->F, slot_shift);
1039 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1040
1041 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1042
1043 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1044 if (mask) {
1045 struct qfq_group *next = qfq_ffs(q, mask);
1046 if (qfq_gt(roundedF, next->F)) {
1047 if (qfq_gt(limit, next->F))
1048 agg->S = next->F;
1049 else
1050 agg->S = limit;
1051 return;
1052 }
1053 }
1054 agg->S = q->V;
1055 } else
1056 agg->S = agg->F;
1057}
1058
1059
1060
1061
1062
1063
1064static inline void
1065qfq_update_agg_ts(struct qfq_sched *q,
1066 struct qfq_aggregate *agg, enum update_reason reason)
1067{
1068 if (reason != requeue)
1069 qfq_update_start(q, agg);
1070 else
1071 agg->S = agg->F;
1072
1073 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1074}
1075
1076static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1077
1078static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1079{
1080 struct qfq_sched *q = qdisc_priv(sch);
1081 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1082 struct qfq_class *cl;
1083 struct sk_buff *skb = NULL;
1084
1085 unsigned int len = 0;
1086
1087 if (in_serv_agg == NULL)
1088 return NULL;
1089
1090 if (!list_empty(&in_serv_agg->active))
1091 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1092
1093
1094
1095
1096
1097
1098 if (len == 0 || in_serv_agg->budget < len) {
1099 charge_actual_service(in_serv_agg);
1100
1101
1102 in_serv_agg->initial_budget = in_serv_agg->budget =
1103 in_serv_agg->budgetmax;
1104
1105 if (!list_empty(&in_serv_agg->active)) {
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 qfq_update_agg_ts(q, in_serv_agg, requeue);
1117 qfq_schedule_agg(q, in_serv_agg);
1118 } else if (sch->q.qlen == 0) {
1119 q->in_serv_agg = NULL;
1120 return NULL;
1121 }
1122
1123
1124
1125
1126
1127 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1128 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1129 }
1130 if (!skb)
1131 return NULL;
1132
1133 qdisc_qstats_backlog_dec(sch, skb);
1134 sch->q.qlen--;
1135 qdisc_bstats_update(sch, skb);
1136
1137 agg_dequeue(in_serv_agg, cl, len);
1138
1139
1140
1141
1142 if (unlikely(in_serv_agg->budget < len))
1143 in_serv_agg->budget = 0;
1144 else
1145 in_serv_agg->budget -= len;
1146
1147 q->V += (u64)len * q->iwsum;
1148 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1149 len, (unsigned long long) in_serv_agg->F,
1150 (unsigned long long) q->V);
1151
1152 return skb;
1153}
1154
1155static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1156{
1157 struct qfq_group *grp;
1158 struct qfq_aggregate *agg, *new_front_agg;
1159 u64 old_F;
1160
1161 qfq_update_eligible(q);
1162 q->oldV = q->V;
1163
1164 if (!q->bitmaps[ER])
1165 return NULL;
1166
1167 grp = qfq_ffs(q, q->bitmaps[ER]);
1168 old_F = grp->F;
1169
1170 agg = qfq_slot_head(grp);
1171
1172
1173 qfq_front_slot_remove(grp);
1174
1175 new_front_agg = qfq_slot_scan(grp);
1176
1177 if (new_front_agg == NULL)
1178 __clear_bit(grp->index, &q->bitmaps[ER]);
1179 else {
1180 u64 roundedS = qfq_round_down(new_front_agg->S,
1181 grp->slot_shift);
1182 unsigned int s;
1183
1184 if (grp->S == roundedS)
1185 return agg;
1186 grp->S = roundedS;
1187 grp->F = roundedS + (2ULL << grp->slot_shift);
1188 __clear_bit(grp->index, &q->bitmaps[ER]);
1189 s = qfq_calc_state(q, grp);
1190 __set_bit(grp->index, &q->bitmaps[s]);
1191 }
1192
1193 qfq_unblock_groups(q, grp->index, old_F);
1194
1195 return agg;
1196}
1197
1198static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1199 struct sk_buff **to_free)
1200{
1201 unsigned int len = qdisc_pkt_len(skb), gso_segs;
1202 struct qfq_sched *q = qdisc_priv(sch);
1203 struct qfq_class *cl;
1204 struct qfq_aggregate *agg;
1205 int err = 0;
1206 bool first;
1207
1208 cl = qfq_classify(skb, sch, &err);
1209 if (cl == NULL) {
1210 if (err & __NET_XMIT_BYPASS)
1211 qdisc_qstats_drop(sch);
1212 __qdisc_drop(skb, to_free);
1213 return err;
1214 }
1215 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1216
1217 if (unlikely(cl->agg->lmax < len)) {
1218 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1219 cl->agg->lmax, len, cl->common.classid);
1220 err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
1221 if (err) {
1222 cl->qstats.drops++;
1223 return qdisc_drop(skb, sch, to_free);
1224 }
1225 }
1226
1227 gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
1228 first = !cl->qdisc->q.qlen;
1229 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1230 if (unlikely(err != NET_XMIT_SUCCESS)) {
1231 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1232 if (net_xmit_drop_count(err)) {
1233 cl->qstats.drops++;
1234 qdisc_qstats_drop(sch);
1235 }
1236 return err;
1237 }
1238
1239 cl->bstats.bytes += len;
1240 cl->bstats.packets += gso_segs;
1241 sch->qstats.backlog += len;
1242 ++sch->q.qlen;
1243
1244 agg = cl->agg;
1245
1246 if (!first) {
1247 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1248 list_first_entry(&agg->active, struct qfq_class, alist)
1249 == cl && cl->deficit < len)
1250 list_move_tail(&cl->alist, &agg->active);
1251
1252 return err;
1253 }
1254
1255
1256 cl->deficit = agg->lmax;
1257 list_add_tail(&cl->alist, &agg->active);
1258
1259 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1260 q->in_serv_agg == agg)
1261 return err;
1262
1263 qfq_activate_agg(q, agg, enqueue);
1264
1265 return err;
1266}
1267
1268
1269
1270
1271static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1272{
1273 struct qfq_group *grp = agg->grp;
1274 u64 roundedS;
1275 int s;
1276
1277 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 if (grp->full_slots) {
1289 if (!qfq_gt(grp->S, agg->S))
1290 goto skip_update;
1291
1292
1293 qfq_slot_rotate(grp, roundedS);
1294
1295 __clear_bit(grp->index, &q->bitmaps[IR]);
1296 __clear_bit(grp->index, &q->bitmaps[IB]);
1297 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1298 q->in_serv_agg == NULL)
1299 q->V = roundedS;
1300
1301 grp->S = roundedS;
1302 grp->F = roundedS + (2ULL << grp->slot_shift);
1303 s = qfq_calc_state(q, grp);
1304 __set_bit(grp->index, &q->bitmaps[s]);
1305
1306 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1307 s, q->bitmaps[s],
1308 (unsigned long long) agg->S,
1309 (unsigned long long) agg->F,
1310 (unsigned long long) q->V);
1311
1312skip_update:
1313 qfq_slot_insert(grp, agg, roundedS);
1314}
1315
1316
1317
1318static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1319 enum update_reason reason)
1320{
1321 agg->initial_budget = agg->budget = agg->budgetmax;
1322
1323 qfq_update_agg_ts(q, agg, reason);
1324 if (q->in_serv_agg == NULL) {
1325 q->in_serv_agg = agg;
1326
1327 q->oldV = q->V = agg->S;
1328 } else if (agg != q->in_serv_agg)
1329 qfq_schedule_agg(q, agg);
1330}
1331
1332static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1333 struct qfq_aggregate *agg)
1334{
1335 unsigned int i, offset;
1336 u64 roundedS;
1337
1338 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1339 offset = (roundedS - grp->S) >> grp->slot_shift;
1340
1341 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1342
1343 hlist_del(&agg->next);
1344 if (hlist_empty(&grp->slots[i]))
1345 __clear_bit(offset, &grp->full_slots);
1346}
1347
1348
1349
1350
1351
1352
1353
1354
1355static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1356{
1357 struct qfq_group *grp = agg->grp;
1358 unsigned long mask;
1359 u64 roundedS;
1360 int s;
1361
1362 if (agg == q->in_serv_agg) {
1363 charge_actual_service(agg);
1364 q->in_serv_agg = qfq_choose_next_agg(q);
1365 return;
1366 }
1367
1368 agg->F = agg->S;
1369 qfq_slot_remove(q, grp, agg);
1370
1371 if (!grp->full_slots) {
1372 __clear_bit(grp->index, &q->bitmaps[IR]);
1373 __clear_bit(grp->index, &q->bitmaps[EB]);
1374 __clear_bit(grp->index, &q->bitmaps[IB]);
1375
1376 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1377 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1378 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1379 if (mask)
1380 mask = ~((1UL << __fls(mask)) - 1);
1381 else
1382 mask = ~0UL;
1383 qfq_move_groups(q, mask, EB, ER);
1384 qfq_move_groups(q, mask, IB, IR);
1385 }
1386 __clear_bit(grp->index, &q->bitmaps[ER]);
1387 } else if (hlist_empty(&grp->slots[grp->front])) {
1388 agg = qfq_slot_scan(grp);
1389 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1390 if (grp->S != roundedS) {
1391 __clear_bit(grp->index, &q->bitmaps[ER]);
1392 __clear_bit(grp->index, &q->bitmaps[IR]);
1393 __clear_bit(grp->index, &q->bitmaps[EB]);
1394 __clear_bit(grp->index, &q->bitmaps[IB]);
1395 grp->S = roundedS;
1396 grp->F = roundedS + (2ULL << grp->slot_shift);
1397 s = qfq_calc_state(q, grp);
1398 __set_bit(grp->index, &q->bitmaps[s]);
1399 }
1400 }
1401}
1402
1403static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1404{
1405 struct qfq_sched *q = qdisc_priv(sch);
1406 struct qfq_class *cl = (struct qfq_class *)arg;
1407
1408 qfq_deactivate_class(q, cl);
1409}
1410
1411static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1412 struct netlink_ext_ack *extack)
1413{
1414 struct qfq_sched *q = qdisc_priv(sch);
1415 struct qfq_group *grp;
1416 int i, j, err;
1417 u32 max_cl_shift, maxbudg_shift, max_classes;
1418
1419 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1420 if (err)
1421 return err;
1422
1423 err = qdisc_class_hash_init(&q->clhash);
1424 if (err < 0)
1425 return err;
1426
1427 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1428 max_classes = QFQ_MAX_AGG_CLASSES;
1429 else
1430 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1431
1432 max_cl_shift = __fls(max_classes);
1433 q->max_agg_classes = 1<<max_cl_shift;
1434
1435
1436 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1437 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1438
1439 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1440 grp = &q->groups[i];
1441 grp->index = i;
1442 grp->slot_shift = q->min_slot_shift + i;
1443 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1444 INIT_HLIST_HEAD(&grp->slots[j]);
1445 }
1446
1447 INIT_HLIST_HEAD(&q->nonfull_aggs);
1448
1449 return 0;
1450}
1451
1452static void qfq_reset_qdisc(struct Qdisc *sch)
1453{
1454 struct qfq_sched *q = qdisc_priv(sch);
1455 struct qfq_class *cl;
1456 unsigned int i;
1457
1458 for (i = 0; i < q->clhash.hashsize; i++) {
1459 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1460 if (cl->qdisc->q.qlen > 0)
1461 qfq_deactivate_class(q, cl);
1462
1463 qdisc_reset(cl->qdisc);
1464 }
1465 }
1466 sch->qstats.backlog = 0;
1467 sch->q.qlen = 0;
1468}
1469
1470static void qfq_destroy_qdisc(struct Qdisc *sch)
1471{
1472 struct qfq_sched *q = qdisc_priv(sch);
1473 struct qfq_class *cl;
1474 struct hlist_node *next;
1475 unsigned int i;
1476
1477 tcf_block_put(q->block);
1478
1479 for (i = 0; i < q->clhash.hashsize; i++) {
1480 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1481 common.hnode) {
1482 qfq_destroy_class(sch, cl);
1483 }
1484 }
1485 qdisc_class_hash_destroy(&q->clhash);
1486}
1487
1488static const struct Qdisc_class_ops qfq_class_ops = {
1489 .change = qfq_change_class,
1490 .delete = qfq_delete_class,
1491 .find = qfq_search_class,
1492 .tcf_block = qfq_tcf_block,
1493 .bind_tcf = qfq_bind_tcf,
1494 .unbind_tcf = qfq_unbind_tcf,
1495 .graft = qfq_graft_class,
1496 .leaf = qfq_class_leaf,
1497 .qlen_notify = qfq_qlen_notify,
1498 .dump = qfq_dump_class,
1499 .dump_stats = qfq_dump_class_stats,
1500 .walk = qfq_walk,
1501};
1502
1503static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1504 .cl_ops = &qfq_class_ops,
1505 .id = "qfq",
1506 .priv_size = sizeof(struct qfq_sched),
1507 .enqueue = qfq_enqueue,
1508 .dequeue = qfq_dequeue,
1509 .peek = qdisc_peek_dequeued,
1510 .init = qfq_init_qdisc,
1511 .reset = qfq_reset_qdisc,
1512 .destroy = qfq_destroy_qdisc,
1513 .owner = THIS_MODULE,
1514};
1515
1516static int __init qfq_init(void)
1517{
1518 return register_qdisc(&qfq_qdisc_ops);
1519}
1520
1521static void __exit qfq_exit(void)
1522{
1523 unregister_qdisc(&qfq_qdisc_ops);
1524}
1525
1526module_init(qfq_init);
1527module_exit(qfq_exit);
1528MODULE_LICENSE("GPL");
1529