1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/init.h>
11#include <linux/bitops.h>
12#include <linux/errno.h>
13#include <linux/netdevice.h>
14#include <linux/pkt_sched.h>
15#include <net/sch_generic.h>
16#include <net/pkt_sched.h>
17#include <net/pkt_cls.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94#define QFQ_MAX_SLOTS 32
95
96
97
98
99
100
101
102
103
104
105#define QFQ_MAX_INDEX 24
106#define QFQ_MAX_WSHIFT 10
107
108#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
109#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
110
111#define FRAC_BITS 30
112#define ONE_FP (1UL << FRAC_BITS)
113
114#define QFQ_MTU_SHIFT 16
115#define QFQ_MIN_LMAX 512
116
117#define QFQ_MAX_AGG_CLASSES 8
118
119
120
121
122
123enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
124
125struct qfq_group;
126
127struct qfq_aggregate;
128
129struct qfq_class {
130 struct Qdisc_class_common common;
131
132 unsigned int filter_cnt;
133
134 struct gnet_stats_basic_packed bstats;
135 struct gnet_stats_queue qstats;
136 struct net_rate_estimator __rcu *rate_est;
137 struct Qdisc *qdisc;
138 struct list_head alist;
139 struct qfq_aggregate *agg;
140 int deficit;
141};
142
143struct qfq_aggregate {
144 struct hlist_node next;
145 u64 S, F;
146
147
148
149
150
151 struct qfq_group *grp;
152
153
154 u32 class_weight;
155
156 int lmax;
157
158 u32 inv_w;
159 u32 budgetmax;
160 u32 initial_budget, budget;
161
162 int num_classes;
163 struct list_head active;
164
165 struct hlist_node nonfull_next;
166};
167
168struct qfq_group {
169 u64 S, F;
170 unsigned int slot_shift;
171 unsigned int index;
172 unsigned int front;
173 unsigned long full_slots;
174
175
176 struct hlist_head slots[QFQ_MAX_SLOTS];
177};
178
179struct qfq_sched {
180 struct tcf_proto __rcu *filter_list;
181 struct tcf_block *block;
182 struct Qdisc_class_hash clhash;
183
184 u64 oldV, V;
185 struct qfq_aggregate *in_serv_agg;
186 u32 wsum;
187 u32 iwsum;
188
189 unsigned long bitmaps[QFQ_MAX_STATE];
190 struct qfq_group groups[QFQ_MAX_INDEX + 1];
191 u32 min_slot_shift;
192
193 u32 max_agg_classes;
194 struct hlist_head nonfull_aggs;
195};
196
197
198
199
200
201
202
203
204enum update_reason {enqueue, requeue};
205
206static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
207{
208 struct qfq_sched *q = qdisc_priv(sch);
209 struct Qdisc_class_common *clc;
210
211 clc = qdisc_class_find(&q->clhash, classid);
212 if (clc == NULL)
213 return NULL;
214 return container_of(clc, struct qfq_class, common);
215}
216
217static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
218 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
219 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
220};
221
222
223
224
225
226
227static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
228{
229 u64 slot_size = (u64)maxlen * inv_w;
230 unsigned long size_map;
231 int index = 0;
232
233 size_map = slot_size >> min_slot_shift;
234 if (!size_map)
235 goto out;
236
237 index = __fls(size_map) + 1;
238 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
239
240 if (index < 0)
241 index = 0;
242out:
243 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
244 (unsigned long) ONE_FP/inv_w, maxlen, index);
245
246 return index;
247}
248
249static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
250static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
251 enum update_reason);
252
253static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
254 u32 lmax, u32 weight)
255{
256 INIT_LIST_HEAD(&agg->active);
257 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
258
259 agg->lmax = lmax;
260 agg->class_weight = weight;
261}
262
263static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
264 u32 lmax, u32 weight)
265{
266 struct qfq_aggregate *agg;
267
268 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
269 if (agg->lmax == lmax && agg->class_weight == weight)
270 return agg;
271
272 return NULL;
273}
274
275
276
277static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
278 int new_num_classes)
279{
280 u32 new_agg_weight;
281
282 if (new_num_classes == q->max_agg_classes)
283 hlist_del_init(&agg->nonfull_next);
284
285 if (agg->num_classes > new_num_classes &&
286 new_num_classes == q->max_agg_classes - 1)
287 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
288
289
290
291
292
293 agg->budgetmax = new_num_classes * agg->lmax;
294 new_agg_weight = agg->class_weight * new_num_classes;
295 agg->inv_w = ONE_FP/new_agg_weight;
296
297 if (agg->grp == NULL) {
298 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
299 q->min_slot_shift);
300 agg->grp = &q->groups[i];
301 }
302
303 q->wsum +=
304 (int) agg->class_weight * (new_num_classes - agg->num_classes);
305 q->iwsum = ONE_FP / q->wsum;
306
307 agg->num_classes = new_num_classes;
308}
309
310
311static void qfq_add_to_agg(struct qfq_sched *q,
312 struct qfq_aggregate *agg,
313 struct qfq_class *cl)
314{
315 cl->agg = agg;
316
317 qfq_update_agg(q, agg, agg->num_classes+1);
318 if (cl->qdisc->q.qlen > 0) {
319 list_add_tail(&cl->alist, &agg->active);
320 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
321 cl && q->in_serv_agg != agg)
322 qfq_activate_agg(q, agg, enqueue);
323 }
324}
325
326static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
327
328static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
329{
330 hlist_del_init(&agg->nonfull_next);
331 q->wsum -= agg->class_weight;
332 if (q->wsum != 0)
333 q->iwsum = ONE_FP / q->wsum;
334
335 if (q->in_serv_agg == agg)
336 q->in_serv_agg = qfq_choose_next_agg(q);
337 kfree(agg);
338}
339
340
341static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
342{
343 struct qfq_aggregate *agg = cl->agg;
344
345
346 list_del(&cl->alist);
347 if (list_empty(&agg->active))
348 qfq_deactivate_agg(q, agg);
349}
350
351
352static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
353{
354 struct qfq_aggregate *agg = cl->agg;
355
356 cl->agg = NULL;
357 if (agg->num_classes == 1) {
358 qfq_destroy_agg(q, agg);
359 return;
360 }
361 qfq_update_agg(q, agg, agg->num_classes-1);
362}
363
364
365static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
366{
367 if (cl->qdisc->q.qlen > 0)
368 qfq_deactivate_class(q, cl);
369
370 qfq_rm_from_agg(q, cl);
371}
372
373
374static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
375 u32 lmax)
376{
377 struct qfq_sched *q = qdisc_priv(sch);
378 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
379
380 if (new_agg == NULL) {
381 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
382 if (new_agg == NULL)
383 return -ENOBUFS;
384 qfq_init_agg(q, new_agg, lmax, weight);
385 }
386 qfq_deact_rm_from_agg(q, cl);
387 qfq_add_to_agg(q, new_agg, cl);
388
389 return 0;
390}
391
392static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
393 struct nlattr **tca, unsigned long *arg,
394 struct netlink_ext_ack *extack)
395{
396 struct qfq_sched *q = qdisc_priv(sch);
397 struct qfq_class *cl = (struct qfq_class *)*arg;
398 bool existing = false;
399 struct nlattr *tb[TCA_QFQ_MAX + 1];
400 struct qfq_aggregate *new_agg = NULL;
401 u32 weight, lmax, inv_w;
402 int err;
403 int delta_w;
404
405 if (tca[TCA_OPTIONS] == NULL) {
406 pr_notice("qfq: no options\n");
407 return -EINVAL;
408 }
409
410 err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
411 qfq_policy, NULL);
412 if (err < 0)
413 return err;
414
415 if (tb[TCA_QFQ_WEIGHT]) {
416 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
417 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
418 pr_notice("qfq: invalid weight %u\n", weight);
419 return -EINVAL;
420 }
421 } else
422 weight = 1;
423
424 if (tb[TCA_QFQ_LMAX]) {
425 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
426 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
427 pr_notice("qfq: invalid max length %u\n", lmax);
428 return -EINVAL;
429 }
430 } else
431 lmax = psched_mtu(qdisc_dev(sch));
432
433 inv_w = ONE_FP / weight;
434 weight = ONE_FP / inv_w;
435
436 if (cl != NULL &&
437 lmax == cl->agg->lmax &&
438 weight == cl->agg->class_weight)
439 return 0;
440
441 delta_w = weight - (cl ? cl->agg->class_weight : 0);
442
443 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
444 pr_notice("qfq: total weight out of range (%d + %u)\n",
445 delta_w, q->wsum);
446 return -EINVAL;
447 }
448
449 if (cl != NULL) {
450 if (tca[TCA_RATE]) {
451 err = gen_replace_estimator(&cl->bstats, NULL,
452 &cl->rate_est,
453 NULL,
454 qdisc_root_sleeping_running(sch),
455 tca[TCA_RATE]);
456 if (err)
457 return err;
458 }
459 existing = true;
460 goto set_change_agg;
461 }
462
463
464 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
465 if (cl == NULL)
466 return -ENOBUFS;
467
468 cl->common.classid = classid;
469 cl->deficit = lmax;
470
471 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
472 classid, NULL);
473 if (cl->qdisc == NULL)
474 cl->qdisc = &noop_qdisc;
475
476 if (tca[TCA_RATE]) {
477 err = gen_new_estimator(&cl->bstats, NULL,
478 &cl->rate_est,
479 NULL,
480 qdisc_root_sleeping_running(sch),
481 tca[TCA_RATE]);
482 if (err)
483 goto destroy_class;
484 }
485
486 if (cl->qdisc != &noop_qdisc)
487 qdisc_hash_add(cl->qdisc, true);
488 sch_tree_lock(sch);
489 qdisc_class_hash_insert(&q->clhash, &cl->common);
490 sch_tree_unlock(sch);
491
492 qdisc_class_hash_grow(sch, &q->clhash);
493
494set_change_agg:
495 sch_tree_lock(sch);
496 new_agg = qfq_find_agg(q, lmax, weight);
497 if (new_agg == NULL) {
498 sch_tree_unlock(sch);
499 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
500 if (new_agg == NULL) {
501 err = -ENOBUFS;
502 gen_kill_estimator(&cl->rate_est);
503 goto destroy_class;
504 }
505 sch_tree_lock(sch);
506 qfq_init_agg(q, new_agg, lmax, weight);
507 }
508 if (existing)
509 qfq_deact_rm_from_agg(q, cl);
510 qfq_add_to_agg(q, new_agg, cl);
511 sch_tree_unlock(sch);
512
513 *arg = (unsigned long)cl;
514 return 0;
515
516destroy_class:
517 qdisc_put(cl->qdisc);
518 kfree(cl);
519 return err;
520}
521
522static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
523{
524 struct qfq_sched *q = qdisc_priv(sch);
525
526 qfq_rm_from_agg(q, cl);
527 gen_kill_estimator(&cl->rate_est);
528 qdisc_put(cl->qdisc);
529 kfree(cl);
530}
531
532static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
533{
534 struct qfq_sched *q = qdisc_priv(sch);
535 struct qfq_class *cl = (struct qfq_class *)arg;
536
537 if (cl->filter_cnt > 0)
538 return -EBUSY;
539
540 sch_tree_lock(sch);
541
542 qdisc_purge_queue(cl->qdisc);
543 qdisc_class_hash_remove(&q->clhash, &cl->common);
544
545 sch_tree_unlock(sch);
546
547 qfq_destroy_class(sch, cl);
548 return 0;
549}
550
551static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
552{
553 return (unsigned long)qfq_find_class(sch, classid);
554}
555
556static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
557 struct netlink_ext_ack *extack)
558{
559 struct qfq_sched *q = qdisc_priv(sch);
560
561 if (cl)
562 return NULL;
563
564 return q->block;
565}
566
567static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
568 u32 classid)
569{
570 struct qfq_class *cl = qfq_find_class(sch, classid);
571
572 if (cl != NULL)
573 cl->filter_cnt++;
574
575 return (unsigned long)cl;
576}
577
578static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
579{
580 struct qfq_class *cl = (struct qfq_class *)arg;
581
582 cl->filter_cnt--;
583}
584
585static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
586 struct Qdisc *new, struct Qdisc **old,
587 struct netlink_ext_ack *extack)
588{
589 struct qfq_class *cl = (struct qfq_class *)arg;
590
591 if (new == NULL) {
592 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
593 cl->common.classid, NULL);
594 if (new == NULL)
595 new = &noop_qdisc;
596 }
597
598 *old = qdisc_replace(sch, new, &cl->qdisc);
599 return 0;
600}
601
602static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
603{
604 struct qfq_class *cl = (struct qfq_class *)arg;
605
606 return cl->qdisc;
607}
608
609static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
610 struct sk_buff *skb, struct tcmsg *tcm)
611{
612 struct qfq_class *cl = (struct qfq_class *)arg;
613 struct nlattr *nest;
614
615 tcm->tcm_parent = TC_H_ROOT;
616 tcm->tcm_handle = cl->common.classid;
617 tcm->tcm_info = cl->qdisc->handle;
618
619 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
620 if (nest == NULL)
621 goto nla_put_failure;
622 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
623 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
624 goto nla_put_failure;
625 return nla_nest_end(skb, nest);
626
627nla_put_failure:
628 nla_nest_cancel(skb, nest);
629 return -EMSGSIZE;
630}
631
632static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
633 struct gnet_dump *d)
634{
635 struct qfq_class *cl = (struct qfq_class *)arg;
636 struct tc_qfq_stats xstats;
637
638 memset(&xstats, 0, sizeof(xstats));
639
640 xstats.weight = cl->agg->class_weight;
641 xstats.lmax = cl->agg->lmax;
642
643 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
644 d, NULL, &cl->bstats) < 0 ||
645 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
646 qdisc_qstats_copy(d, cl->qdisc) < 0)
647 return -1;
648
649 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
650}
651
652static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
653{
654 struct qfq_sched *q = qdisc_priv(sch);
655 struct qfq_class *cl;
656 unsigned int i;
657
658 if (arg->stop)
659 return;
660
661 for (i = 0; i < q->clhash.hashsize; i++) {
662 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
663 if (arg->count < arg->skip) {
664 arg->count++;
665 continue;
666 }
667 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
668 arg->stop = 1;
669 return;
670 }
671 arg->count++;
672 }
673 }
674}
675
676static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
677 int *qerr)
678{
679 struct qfq_sched *q = qdisc_priv(sch);
680 struct qfq_class *cl;
681 struct tcf_result res;
682 struct tcf_proto *fl;
683 int result;
684
685 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
686 pr_debug("qfq_classify: found %d\n", skb->priority);
687 cl = qfq_find_class(sch, skb->priority);
688 if (cl != NULL)
689 return cl;
690 }
691
692 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
693 fl = rcu_dereference_bh(q->filter_list);
694 result = tcf_classify(skb, fl, &res, false);
695 if (result >= 0) {
696#ifdef CONFIG_NET_CLS_ACT
697 switch (result) {
698 case TC_ACT_QUEUED:
699 case TC_ACT_STOLEN:
700 case TC_ACT_TRAP:
701 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
702
703 case TC_ACT_SHOT:
704 return NULL;
705 }
706#endif
707 cl = (struct qfq_class *)res.class;
708 if (cl == NULL)
709 cl = qfq_find_class(sch, res.classid);
710 return cl;
711 }
712
713 return NULL;
714}
715
716
717static inline int qfq_gt(u64 a, u64 b)
718{
719 return (s64)(a - b) > 0;
720}
721
722
723static inline u64 qfq_round_down(u64 ts, unsigned int shift)
724{
725 return ts & ~((1ULL << shift) - 1);
726}
727
728
729static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
730 unsigned long bitmap)
731{
732 int index = __ffs(bitmap);
733 return &q->groups[index];
734}
735
736static inline unsigned long mask_from(unsigned long bitmap, int from)
737{
738 return bitmap & ~((1UL << from) - 1);
739}
740
741
742
743
744
745
746static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
747{
748
749 unsigned int state = qfq_gt(grp->S, q->V);
750 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
751 struct qfq_group *next;
752
753 if (mask) {
754 next = qfq_ffs(q, mask);
755 if (qfq_gt(grp->F, next->F))
756 state |= EB;
757 }
758
759 return state;
760}
761
762
763
764
765
766
767
768
769static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
770 int src, int dst)
771{
772 q->bitmaps[dst] |= q->bitmaps[src] & mask;
773 q->bitmaps[src] &= ~mask;
774}
775
776static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
777{
778 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
779 struct qfq_group *next;
780
781 if (mask) {
782 next = qfq_ffs(q, mask);
783 if (!qfq_gt(next->F, old_F))
784 return;
785 }
786
787 mask = (1UL << index) - 1;
788 qfq_move_groups(q, mask, EB, ER);
789 qfq_move_groups(q, mask, IB, IR);
790}
791
792
793
794
795
796
797
798
799
800
801
802static void qfq_make_eligible(struct qfq_sched *q)
803{
804 unsigned long vslot = q->V >> q->min_slot_shift;
805 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
806
807 if (vslot != old_vslot) {
808 unsigned long mask;
809 int last_flip_pos = fls(vslot ^ old_vslot);
810
811 if (last_flip_pos > 31)
812 mask = ~0UL;
813 else
814 mask = (1UL << last_flip_pos) - 1;
815
816 qfq_move_groups(q, mask, IR, ER);
817 qfq_move_groups(q, mask, IB, EB);
818 }
819}
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
877 u64 roundedS)
878{
879 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
880 unsigned int i;
881
882 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
883 u64 deltaS = roundedS - grp->S -
884 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
885 agg->S -= deltaS;
886 agg->F -= deltaS;
887 slot = QFQ_MAX_SLOTS - 2;
888 }
889
890 i = (grp->front + slot) % QFQ_MAX_SLOTS;
891
892 hlist_add_head(&agg->next, &grp->slots[i]);
893 __set_bit(slot, &grp->full_slots);
894}
895
896
897static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
898{
899 return hlist_entry(grp->slots[grp->front].first,
900 struct qfq_aggregate, next);
901}
902
903
904
905
906static void qfq_front_slot_remove(struct qfq_group *grp)
907{
908 struct qfq_aggregate *agg = qfq_slot_head(grp);
909
910 BUG_ON(!agg);
911 hlist_del(&agg->next);
912 if (hlist_empty(&grp->slots[grp->front]))
913 __clear_bit(0, &grp->full_slots);
914}
915
916
917
918
919
920
921static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
922{
923 unsigned int i;
924
925 pr_debug("qfq slot_scan: grp %u full %#lx\n",
926 grp->index, grp->full_slots);
927
928 if (grp->full_slots == 0)
929 return NULL;
930
931 i = __ffs(grp->full_slots);
932 if (i > 0) {
933 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
934 grp->full_slots >>= i;
935 }
936
937 return qfq_slot_head(grp);
938}
939
940
941
942
943
944
945
946
947
948
949static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
950{
951 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
952
953 grp->full_slots <<= i;
954 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
955}
956
957static void qfq_update_eligible(struct qfq_sched *q)
958{
959 struct qfq_group *grp;
960 unsigned long ineligible;
961
962 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
963 if (ineligible) {
964 if (!q->bitmaps[ER]) {
965 grp = qfq_ffs(q, ineligible);
966 if (qfq_gt(grp->S, q->V))
967 q->V = grp->S;
968 }
969 qfq_make_eligible(q);
970 }
971}
972
973
974static void agg_dequeue(struct qfq_aggregate *agg,
975 struct qfq_class *cl, unsigned int len)
976{
977 qdisc_dequeue_peeked(cl->qdisc);
978
979 cl->deficit -= (int) len;
980
981 if (cl->qdisc->q.qlen == 0)
982 list_del(&cl->alist);
983 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
984 cl->deficit += agg->lmax;
985 list_move_tail(&cl->alist, &agg->active);
986 }
987}
988
989static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
990 struct qfq_class **cl,
991 unsigned int *len)
992{
993 struct sk_buff *skb;
994
995 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
996 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
997 if (skb == NULL)
998 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
999 else
1000 *len = qdisc_pkt_len(skb);
1001
1002 return skb;
1003}
1004
1005
1006static inline void charge_actual_service(struct qfq_aggregate *agg)
1007{
1008
1009
1010
1011
1012
1013 u32 service_received = min(agg->budgetmax,
1014 agg->initial_budget - agg->budget);
1015
1016 agg->F = agg->S + (u64)service_received * agg->inv_w;
1017}
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1032{
1033 unsigned long mask;
1034 u64 limit, roundedF;
1035 int slot_shift = agg->grp->slot_shift;
1036
1037 roundedF = qfq_round_down(agg->F, slot_shift);
1038 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1039
1040 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1041
1042 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1043 if (mask) {
1044 struct qfq_group *next = qfq_ffs(q, mask);
1045 if (qfq_gt(roundedF, next->F)) {
1046 if (qfq_gt(limit, next->F))
1047 agg->S = next->F;
1048 else
1049 agg->S = limit;
1050 return;
1051 }
1052 }
1053 agg->S = q->V;
1054 } else
1055 agg->S = agg->F;
1056}
1057
1058
1059
1060
1061
1062
1063static inline void
1064qfq_update_agg_ts(struct qfq_sched *q,
1065 struct qfq_aggregate *agg, enum update_reason reason)
1066{
1067 if (reason != requeue)
1068 qfq_update_start(q, agg);
1069 else
1070 agg->S = agg->F;
1071
1072 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1073}
1074
1075static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1076
1077static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1078{
1079 struct qfq_sched *q = qdisc_priv(sch);
1080 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1081 struct qfq_class *cl;
1082 struct sk_buff *skb = NULL;
1083
1084 unsigned int len = 0;
1085
1086 if (in_serv_agg == NULL)
1087 return NULL;
1088
1089 if (!list_empty(&in_serv_agg->active))
1090 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1091
1092
1093
1094
1095
1096
1097 if (len == 0 || in_serv_agg->budget < len) {
1098 charge_actual_service(in_serv_agg);
1099
1100
1101 in_serv_agg->initial_budget = in_serv_agg->budget =
1102 in_serv_agg->budgetmax;
1103
1104 if (!list_empty(&in_serv_agg->active)) {
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 qfq_update_agg_ts(q, in_serv_agg, requeue);
1116 qfq_schedule_agg(q, in_serv_agg);
1117 } else if (sch->q.qlen == 0) {
1118 q->in_serv_agg = NULL;
1119 return NULL;
1120 }
1121
1122
1123
1124
1125
1126 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1127 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1128 }
1129 if (!skb)
1130 return NULL;
1131
1132 qdisc_qstats_backlog_dec(sch, skb);
1133 sch->q.qlen--;
1134 qdisc_bstats_update(sch, skb);
1135
1136 agg_dequeue(in_serv_agg, cl, len);
1137
1138
1139
1140
1141 if (unlikely(in_serv_agg->budget < len))
1142 in_serv_agg->budget = 0;
1143 else
1144 in_serv_agg->budget -= len;
1145
1146 q->V += (u64)len * q->iwsum;
1147 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1148 len, (unsigned long long) in_serv_agg->F,
1149 (unsigned long long) q->V);
1150
1151 return skb;
1152}
1153
1154static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1155{
1156 struct qfq_group *grp;
1157 struct qfq_aggregate *agg, *new_front_agg;
1158 u64 old_F;
1159
1160 qfq_update_eligible(q);
1161 q->oldV = q->V;
1162
1163 if (!q->bitmaps[ER])
1164 return NULL;
1165
1166 grp = qfq_ffs(q, q->bitmaps[ER]);
1167 old_F = grp->F;
1168
1169 agg = qfq_slot_head(grp);
1170
1171
1172 qfq_front_slot_remove(grp);
1173
1174 new_front_agg = qfq_slot_scan(grp);
1175
1176 if (new_front_agg == NULL)
1177 __clear_bit(grp->index, &q->bitmaps[ER]);
1178 else {
1179 u64 roundedS = qfq_round_down(new_front_agg->S,
1180 grp->slot_shift);
1181 unsigned int s;
1182
1183 if (grp->S == roundedS)
1184 return agg;
1185 grp->S = roundedS;
1186 grp->F = roundedS + (2ULL << grp->slot_shift);
1187 __clear_bit(grp->index, &q->bitmaps[ER]);
1188 s = qfq_calc_state(q, grp);
1189 __set_bit(grp->index, &q->bitmaps[s]);
1190 }
1191
1192 qfq_unblock_groups(q, grp->index, old_F);
1193
1194 return agg;
1195}
1196
1197static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1198 struct sk_buff **to_free)
1199{
1200 unsigned int len = qdisc_pkt_len(skb), gso_segs;
1201 struct qfq_sched *q = qdisc_priv(sch);
1202 struct qfq_class *cl;
1203 struct qfq_aggregate *agg;
1204 int err = 0;
1205 bool first;
1206
1207 cl = qfq_classify(skb, sch, &err);
1208 if (cl == NULL) {
1209 if (err & __NET_XMIT_BYPASS)
1210 qdisc_qstats_drop(sch);
1211 __qdisc_drop(skb, to_free);
1212 return err;
1213 }
1214 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1215
1216 if (unlikely(cl->agg->lmax < len)) {
1217 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1218 cl->agg->lmax, len, cl->common.classid);
1219 err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
1220 if (err) {
1221 cl->qstats.drops++;
1222 return qdisc_drop(skb, sch, to_free);
1223 }
1224 }
1225
1226 gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
1227 first = !cl->qdisc->q.qlen;
1228 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1229 if (unlikely(err != NET_XMIT_SUCCESS)) {
1230 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1231 if (net_xmit_drop_count(err)) {
1232 cl->qstats.drops++;
1233 qdisc_qstats_drop(sch);
1234 }
1235 return err;
1236 }
1237
1238 cl->bstats.bytes += len;
1239 cl->bstats.packets += gso_segs;
1240 sch->qstats.backlog += len;
1241 ++sch->q.qlen;
1242
1243 agg = cl->agg;
1244
1245 if (!first) {
1246 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1247 list_first_entry(&agg->active, struct qfq_class, alist)
1248 == cl && cl->deficit < len)
1249 list_move_tail(&cl->alist, &agg->active);
1250
1251 return err;
1252 }
1253
1254
1255 cl->deficit = agg->lmax;
1256 list_add_tail(&cl->alist, &agg->active);
1257
1258 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1259 q->in_serv_agg == agg)
1260 return err;
1261
1262 qfq_activate_agg(q, agg, enqueue);
1263
1264 return err;
1265}
1266
1267
1268
1269
1270static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1271{
1272 struct qfq_group *grp = agg->grp;
1273 u64 roundedS;
1274 int s;
1275
1276 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 if (grp->full_slots) {
1288 if (!qfq_gt(grp->S, agg->S))
1289 goto skip_update;
1290
1291
1292 qfq_slot_rotate(grp, roundedS);
1293
1294 __clear_bit(grp->index, &q->bitmaps[IR]);
1295 __clear_bit(grp->index, &q->bitmaps[IB]);
1296 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1297 q->in_serv_agg == NULL)
1298 q->V = roundedS;
1299
1300 grp->S = roundedS;
1301 grp->F = roundedS + (2ULL << grp->slot_shift);
1302 s = qfq_calc_state(q, grp);
1303 __set_bit(grp->index, &q->bitmaps[s]);
1304
1305 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1306 s, q->bitmaps[s],
1307 (unsigned long long) agg->S,
1308 (unsigned long long) agg->F,
1309 (unsigned long long) q->V);
1310
1311skip_update:
1312 qfq_slot_insert(grp, agg, roundedS);
1313}
1314
1315
1316
1317static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1318 enum update_reason reason)
1319{
1320 agg->initial_budget = agg->budget = agg->budgetmax;
1321
1322 qfq_update_agg_ts(q, agg, reason);
1323 if (q->in_serv_agg == NULL) {
1324 q->in_serv_agg = agg;
1325
1326 q->oldV = q->V = agg->S;
1327 } else if (agg != q->in_serv_agg)
1328 qfq_schedule_agg(q, agg);
1329}
1330
1331static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1332 struct qfq_aggregate *agg)
1333{
1334 unsigned int i, offset;
1335 u64 roundedS;
1336
1337 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1338 offset = (roundedS - grp->S) >> grp->slot_shift;
1339
1340 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1341
1342 hlist_del(&agg->next);
1343 if (hlist_empty(&grp->slots[i]))
1344 __clear_bit(offset, &grp->full_slots);
1345}
1346
1347
1348
1349
1350
1351
1352
1353
1354static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1355{
1356 struct qfq_group *grp = agg->grp;
1357 unsigned long mask;
1358 u64 roundedS;
1359 int s;
1360
1361 if (agg == q->in_serv_agg) {
1362 charge_actual_service(agg);
1363 q->in_serv_agg = qfq_choose_next_agg(q);
1364 return;
1365 }
1366
1367 agg->F = agg->S;
1368 qfq_slot_remove(q, grp, agg);
1369
1370 if (!grp->full_slots) {
1371 __clear_bit(grp->index, &q->bitmaps[IR]);
1372 __clear_bit(grp->index, &q->bitmaps[EB]);
1373 __clear_bit(grp->index, &q->bitmaps[IB]);
1374
1375 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1376 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1377 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1378 if (mask)
1379 mask = ~((1UL << __fls(mask)) - 1);
1380 else
1381 mask = ~0UL;
1382 qfq_move_groups(q, mask, EB, ER);
1383 qfq_move_groups(q, mask, IB, IR);
1384 }
1385 __clear_bit(grp->index, &q->bitmaps[ER]);
1386 } else if (hlist_empty(&grp->slots[grp->front])) {
1387 agg = qfq_slot_scan(grp);
1388 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1389 if (grp->S != roundedS) {
1390 __clear_bit(grp->index, &q->bitmaps[ER]);
1391 __clear_bit(grp->index, &q->bitmaps[IR]);
1392 __clear_bit(grp->index, &q->bitmaps[EB]);
1393 __clear_bit(grp->index, &q->bitmaps[IB]);
1394 grp->S = roundedS;
1395 grp->F = roundedS + (2ULL << grp->slot_shift);
1396 s = qfq_calc_state(q, grp);
1397 __set_bit(grp->index, &q->bitmaps[s]);
1398 }
1399 }
1400}
1401
1402static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1403{
1404 struct qfq_sched *q = qdisc_priv(sch);
1405 struct qfq_class *cl = (struct qfq_class *)arg;
1406
1407 qfq_deactivate_class(q, cl);
1408}
1409
1410static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1411 struct netlink_ext_ack *extack)
1412{
1413 struct qfq_sched *q = qdisc_priv(sch);
1414 struct qfq_group *grp;
1415 int i, j, err;
1416 u32 max_cl_shift, maxbudg_shift, max_classes;
1417
1418 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1419 if (err)
1420 return err;
1421
1422 err = qdisc_class_hash_init(&q->clhash);
1423 if (err < 0)
1424 return err;
1425
1426 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1427 max_classes = QFQ_MAX_AGG_CLASSES;
1428 else
1429 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1430
1431 max_cl_shift = __fls(max_classes);
1432 q->max_agg_classes = 1<<max_cl_shift;
1433
1434
1435 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1436 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1437
1438 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1439 grp = &q->groups[i];
1440 grp->index = i;
1441 grp->slot_shift = q->min_slot_shift + i;
1442 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1443 INIT_HLIST_HEAD(&grp->slots[j]);
1444 }
1445
1446 INIT_HLIST_HEAD(&q->nonfull_aggs);
1447
1448 return 0;
1449}
1450
1451static void qfq_reset_qdisc(struct Qdisc *sch)
1452{
1453 struct qfq_sched *q = qdisc_priv(sch);
1454 struct qfq_class *cl;
1455 unsigned int i;
1456
1457 for (i = 0; i < q->clhash.hashsize; i++) {
1458 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1459 if (cl->qdisc->q.qlen > 0)
1460 qfq_deactivate_class(q, cl);
1461
1462 qdisc_reset(cl->qdisc);
1463 }
1464 }
1465 sch->qstats.backlog = 0;
1466 sch->q.qlen = 0;
1467}
1468
1469static void qfq_destroy_qdisc(struct Qdisc *sch)
1470{
1471 struct qfq_sched *q = qdisc_priv(sch);
1472 struct qfq_class *cl;
1473 struct hlist_node *next;
1474 unsigned int i;
1475
1476 tcf_block_put(q->block);
1477
1478 for (i = 0; i < q->clhash.hashsize; i++) {
1479 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1480 common.hnode) {
1481 qfq_destroy_class(sch, cl);
1482 }
1483 }
1484 qdisc_class_hash_destroy(&q->clhash);
1485}
1486
1487static const struct Qdisc_class_ops qfq_class_ops = {
1488 .change = qfq_change_class,
1489 .delete = qfq_delete_class,
1490 .find = qfq_search_class,
1491 .tcf_block = qfq_tcf_block,
1492 .bind_tcf = qfq_bind_tcf,
1493 .unbind_tcf = qfq_unbind_tcf,
1494 .graft = qfq_graft_class,
1495 .leaf = qfq_class_leaf,
1496 .qlen_notify = qfq_qlen_notify,
1497 .dump = qfq_dump_class,
1498 .dump_stats = qfq_dump_class_stats,
1499 .walk = qfq_walk,
1500};
1501
1502static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1503 .cl_ops = &qfq_class_ops,
1504 .id = "qfq",
1505 .priv_size = sizeof(struct qfq_sched),
1506 .enqueue = qfq_enqueue,
1507 .dequeue = qfq_dequeue,
1508 .peek = qdisc_peek_dequeued,
1509 .init = qfq_init_qdisc,
1510 .reset = qfq_reset_qdisc,
1511 .destroy = qfq_destroy_qdisc,
1512 .owner = THIS_MODULE,
1513};
1514
1515static int __init qfq_init(void)
1516{
1517 return register_qdisc(&qfq_qdisc_ops);
1518}
1519
1520static void __exit qfq_exit(void)
1521{
1522 unregister_qdisc(&qfq_qdisc_ops);
1523}
1524
1525module_init(qfq_init);
1526module_exit(qfq_exit);
1527MODULE_LICENSE("GPL");
1528