1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116
117#define QFQ_MTU_SHIFT 16
118#define QFQ_MIN_LMAX 512
119
120#define QFQ_MAX_AGG_CLASSES 8
121
122
123
124
125
126enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
127
128struct qfq_group;
129
130struct qfq_aggregate;
131
132struct qfq_class {
133 struct Qdisc_class_common common;
134
135 unsigned int filter_cnt;
136
137 struct gnet_stats_basic_packed bstats;
138 struct gnet_stats_queue qstats;
139 struct net_rate_estimator __rcu *rate_est;
140 struct Qdisc *qdisc;
141 struct list_head alist;
142 struct qfq_aggregate *agg;
143 int deficit;
144};
145
146struct qfq_aggregate {
147 struct hlist_node next;
148 u64 S, F;
149
150
151
152
153
154 struct qfq_group *grp;
155
156
157 u32 class_weight;
158
159 int lmax;
160
161 u32 inv_w;
162 u32 budgetmax;
163 u32 initial_budget, budget;
164
165 int num_classes;
166 struct list_head active;
167
168 struct hlist_node nonfull_next;
169};
170
171struct qfq_group {
172 u64 S, F;
173 unsigned int slot_shift;
174 unsigned int index;
175 unsigned int front;
176 unsigned long full_slots;
177
178
179 struct hlist_head slots[QFQ_MAX_SLOTS];
180};
181
182struct qfq_sched {
183 struct tcf_proto __rcu *filter_list;
184 struct tcf_block *block;
185 struct Qdisc_class_hash clhash;
186
187 u64 oldV, V;
188 struct qfq_aggregate *in_serv_agg;
189 u32 wsum;
190 u32 iwsum;
191
192 unsigned long bitmaps[QFQ_MAX_STATE];
193 struct qfq_group groups[QFQ_MAX_INDEX + 1];
194 u32 min_slot_shift;
195
196 u32 max_agg_classes;
197 struct hlist_head nonfull_aggs;
198};
199
200
201
202
203
204
205
206
207enum update_reason {enqueue, requeue};
208
209static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
210{
211 struct qfq_sched *q = qdisc_priv(sch);
212 struct Qdisc_class_common *clc;
213
214 clc = qdisc_class_find(&q->clhash, classid);
215 if (clc == NULL)
216 return NULL;
217 return container_of(clc, struct qfq_class, common);
218}
219
220static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
221 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
222 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
223};
224
225
226
227
228
229
230static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
231{
232 u64 slot_size = (u64)maxlen * inv_w;
233 unsigned long size_map;
234 int index = 0;
235
236 size_map = slot_size >> min_slot_shift;
237 if (!size_map)
238 goto out;
239
240 index = __fls(size_map) + 1;
241 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
242
243 if (index < 0)
244 index = 0;
245out:
246 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
247 (unsigned long) ONE_FP/inv_w, maxlen, index);
248
249 return index;
250}
251
252static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
253static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
254 enum update_reason);
255
256static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
257 u32 lmax, u32 weight)
258{
259 INIT_LIST_HEAD(&agg->active);
260 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
261
262 agg->lmax = lmax;
263 agg->class_weight = weight;
264}
265
266static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
267 u32 lmax, u32 weight)
268{
269 struct qfq_aggregate *agg;
270
271 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
272 if (agg->lmax == lmax && agg->class_weight == weight)
273 return agg;
274
275 return NULL;
276}
277
278
279
280static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
281 int new_num_classes)
282{
283 u32 new_agg_weight;
284
285 if (new_num_classes == q->max_agg_classes)
286 hlist_del_init(&agg->nonfull_next);
287
288 if (agg->num_classes > new_num_classes &&
289 new_num_classes == q->max_agg_classes - 1)
290 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
291
292
293
294
295
296 agg->budgetmax = new_num_classes * agg->lmax;
297 new_agg_weight = agg->class_weight * new_num_classes;
298 agg->inv_w = ONE_FP/new_agg_weight;
299
300 if (agg->grp == NULL) {
301 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
302 q->min_slot_shift);
303 agg->grp = &q->groups[i];
304 }
305
306 q->wsum +=
307 (int) agg->class_weight * (new_num_classes - agg->num_classes);
308 q->iwsum = ONE_FP / q->wsum;
309
310 agg->num_classes = new_num_classes;
311}
312
313
314static void qfq_add_to_agg(struct qfq_sched *q,
315 struct qfq_aggregate *agg,
316 struct qfq_class *cl)
317{
318 cl->agg = agg;
319
320 qfq_update_agg(q, agg, agg->num_classes+1);
321 if (cl->qdisc->q.qlen > 0) {
322 list_add_tail(&cl->alist, &agg->active);
323 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
324 cl && q->in_serv_agg != agg)
325 qfq_activate_agg(q, agg, enqueue);
326 }
327}
328
329static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
330
331static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
332{
333 hlist_del_init(&agg->nonfull_next);
334 q->wsum -= agg->class_weight;
335 if (q->wsum != 0)
336 q->iwsum = ONE_FP / q->wsum;
337
338 if (q->in_serv_agg == agg)
339 q->in_serv_agg = qfq_choose_next_agg(q);
340 kfree(agg);
341}
342
343
344static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
345{
346 struct qfq_aggregate *agg = cl->agg;
347
348
349 list_del(&cl->alist);
350 if (list_empty(&agg->active))
351 qfq_deactivate_agg(q, agg);
352}
353
354
355static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
356{
357 struct qfq_aggregate *agg = cl->agg;
358
359 cl->agg = NULL;
360 if (agg->num_classes == 1) {
361 qfq_destroy_agg(q, agg);
362 return;
363 }
364 qfq_update_agg(q, agg, agg->num_classes-1);
365}
366
367
368static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
369{
370 if (cl->qdisc->q.qlen > 0)
371 qfq_deactivate_class(q, cl);
372
373 qfq_rm_from_agg(q, cl);
374}
375
376
377static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
378 u32 lmax)
379{
380 struct qfq_sched *q = qdisc_priv(sch);
381 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
382
383 if (new_agg == NULL) {
384 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
385 if (new_agg == NULL)
386 return -ENOBUFS;
387 qfq_init_agg(q, new_agg, lmax, weight);
388 }
389 qfq_deact_rm_from_agg(q, cl);
390 qfq_add_to_agg(q, new_agg, cl);
391
392 return 0;
393}
394
395static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
396 struct nlattr **tca, unsigned long *arg,
397 struct netlink_ext_ack *extack)
398{
399 struct qfq_sched *q = qdisc_priv(sch);
400 struct qfq_class *cl = (struct qfq_class *)*arg;
401 bool existing = false;
402 struct nlattr *tb[TCA_QFQ_MAX + 1];
403 struct qfq_aggregate *new_agg = NULL;
404 u32 weight, lmax, inv_w;
405 int err;
406 int delta_w;
407
408 if (tca[TCA_OPTIONS] == NULL) {
409 pr_notice("qfq: no options\n");
410 return -EINVAL;
411 }
412
413 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy,
414 NULL);
415 if (err < 0)
416 return err;
417
418 if (tb[TCA_QFQ_WEIGHT]) {
419 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
420 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
421 pr_notice("qfq: invalid weight %u\n", weight);
422 return -EINVAL;
423 }
424 } else
425 weight = 1;
426
427 if (tb[TCA_QFQ_LMAX]) {
428 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
429 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
430 pr_notice("qfq: invalid max length %u\n", lmax);
431 return -EINVAL;
432 }
433 } else
434 lmax = psched_mtu(qdisc_dev(sch));
435
436 inv_w = ONE_FP / weight;
437 weight = ONE_FP / inv_w;
438
439 if (cl != NULL &&
440 lmax == cl->agg->lmax &&
441 weight == cl->agg->class_weight)
442 return 0;
443
444 delta_w = weight - (cl ? cl->agg->class_weight : 0);
445
446 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
447 pr_notice("qfq: total weight out of range (%d + %u)\n",
448 delta_w, q->wsum);
449 return -EINVAL;
450 }
451
452 if (cl != NULL) {
453 if (tca[TCA_RATE]) {
454 err = gen_replace_estimator(&cl->bstats, NULL,
455 &cl->rate_est,
456 NULL,
457 qdisc_root_sleeping_running(sch),
458 tca[TCA_RATE]);
459 if (err)
460 return err;
461 }
462 existing = true;
463 goto set_change_agg;
464 }
465
466
467 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
468 if (cl == NULL)
469 return -ENOBUFS;
470
471 cl->common.classid = classid;
472 cl->deficit = lmax;
473
474 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
475 classid, NULL);
476 if (cl->qdisc == NULL)
477 cl->qdisc = &noop_qdisc;
478
479 if (tca[TCA_RATE]) {
480 err = gen_new_estimator(&cl->bstats, NULL,
481 &cl->rate_est,
482 NULL,
483 qdisc_root_sleeping_running(sch),
484 tca[TCA_RATE]);
485 if (err)
486 goto destroy_class;
487 }
488
489 if (cl->qdisc != &noop_qdisc)
490 qdisc_hash_add(cl->qdisc, true);
491 sch_tree_lock(sch);
492 qdisc_class_hash_insert(&q->clhash, &cl->common);
493 sch_tree_unlock(sch);
494
495 qdisc_class_hash_grow(sch, &q->clhash);
496
497set_change_agg:
498 sch_tree_lock(sch);
499 new_agg = qfq_find_agg(q, lmax, weight);
500 if (new_agg == NULL) {
501 sch_tree_unlock(sch);
502 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
503 if (new_agg == NULL) {
504 err = -ENOBUFS;
505 gen_kill_estimator(&cl->rate_est);
506 goto destroy_class;
507 }
508 sch_tree_lock(sch);
509 qfq_init_agg(q, new_agg, lmax, weight);
510 }
511 if (existing)
512 qfq_deact_rm_from_agg(q, cl);
513 qfq_add_to_agg(q, new_agg, cl);
514 sch_tree_unlock(sch);
515
516 *arg = (unsigned long)cl;
517 return 0;
518
519destroy_class:
520 qdisc_put(cl->qdisc);
521 kfree(cl);
522 return err;
523}
524
525static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
526{
527 struct qfq_sched *q = qdisc_priv(sch);
528
529 qfq_rm_from_agg(q, cl);
530 gen_kill_estimator(&cl->rate_est);
531 qdisc_put(cl->qdisc);
532 kfree(cl);
533}
534
535static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
536{
537 struct qfq_sched *q = qdisc_priv(sch);
538 struct qfq_class *cl = (struct qfq_class *)arg;
539
540 if (cl->filter_cnt > 0)
541 return -EBUSY;
542
543 sch_tree_lock(sch);
544
545 qdisc_purge_queue(cl->qdisc);
546 qdisc_class_hash_remove(&q->clhash, &cl->common);
547
548 sch_tree_unlock(sch);
549
550 qfq_destroy_class(sch, cl);
551 return 0;
552}
553
554static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
555{
556 return (unsigned long)qfq_find_class(sch, classid);
557}
558
559static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
560 struct netlink_ext_ack *extack)
561{
562 struct qfq_sched *q = qdisc_priv(sch);
563
564 if (cl)
565 return NULL;
566
567 return q->block;
568}
569
570static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
571 u32 classid)
572{
573 struct qfq_class *cl = qfq_find_class(sch, classid);
574
575 if (cl != NULL)
576 cl->filter_cnt++;
577
578 return (unsigned long)cl;
579}
580
581static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
582{
583 struct qfq_class *cl = (struct qfq_class *)arg;
584
585 cl->filter_cnt--;
586}
587
588static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
589 struct Qdisc *new, struct Qdisc **old,
590 struct netlink_ext_ack *extack)
591{
592 struct qfq_class *cl = (struct qfq_class *)arg;
593
594 if (new == NULL) {
595 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
596 cl->common.classid, NULL);
597 if (new == NULL)
598 new = &noop_qdisc;
599 }
600
601 *old = qdisc_replace(sch, new, &cl->qdisc);
602 return 0;
603}
604
605static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
606{
607 struct qfq_class *cl = (struct qfq_class *)arg;
608
609 return cl->qdisc;
610}
611
612static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
613 struct sk_buff *skb, struct tcmsg *tcm)
614{
615 struct qfq_class *cl = (struct qfq_class *)arg;
616 struct nlattr *nest;
617
618 tcm->tcm_parent = TC_H_ROOT;
619 tcm->tcm_handle = cl->common.classid;
620 tcm->tcm_info = cl->qdisc->handle;
621
622 nest = nla_nest_start(skb, TCA_OPTIONS);
623 if (nest == NULL)
624 goto nla_put_failure;
625 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
626 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
627 goto nla_put_failure;
628 return nla_nest_end(skb, nest);
629
630nla_put_failure:
631 nla_nest_cancel(skb, nest);
632 return -EMSGSIZE;
633}
634
635static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
636 struct gnet_dump *d)
637{
638 struct qfq_class *cl = (struct qfq_class *)arg;
639 struct tc_qfq_stats xstats;
640
641 memset(&xstats, 0, sizeof(xstats));
642
643 xstats.weight = cl->agg->class_weight;
644 xstats.lmax = cl->agg->lmax;
645
646 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
647 d, NULL, &cl->bstats) < 0 ||
648 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
649 qdisc_qstats_copy(d, cl->qdisc) < 0)
650 return -1;
651
652 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
653}
654
655static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
656{
657 struct qfq_sched *q = qdisc_priv(sch);
658 struct qfq_class *cl;
659 unsigned int i;
660
661 if (arg->stop)
662 return;
663
664 for (i = 0; i < q->clhash.hashsize; i++) {
665 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
666 if (arg->count < arg->skip) {
667 arg->count++;
668 continue;
669 }
670 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
671 arg->stop = 1;
672 return;
673 }
674 arg->count++;
675 }
676 }
677}
678
679static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
680 int *qerr)
681{
682 struct qfq_sched *q = qdisc_priv(sch);
683 struct qfq_class *cl;
684 struct tcf_result res;
685 struct tcf_proto *fl;
686 int result;
687
688 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
689 pr_debug("qfq_classify: found %d\n", skb->priority);
690 cl = qfq_find_class(sch, skb->priority);
691 if (cl != NULL)
692 return cl;
693 }
694
695 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
696 fl = rcu_dereference_bh(q->filter_list);
697 result = tcf_classify(skb, fl, &res, false);
698 if (result >= 0) {
699#ifdef CONFIG_NET_CLS_ACT
700 switch (result) {
701 case TC_ACT_QUEUED:
702 case TC_ACT_STOLEN:
703 case TC_ACT_TRAP:
704 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
705
706 case TC_ACT_SHOT:
707 return NULL;
708 }
709#endif
710 cl = (struct qfq_class *)res.class;
711 if (cl == NULL)
712 cl = qfq_find_class(sch, res.classid);
713 return cl;
714 }
715
716 return NULL;
717}
718
719
720static inline int qfq_gt(u64 a, u64 b)
721{
722 return (s64)(a - b) > 0;
723}
724
725
726static inline u64 qfq_round_down(u64 ts, unsigned int shift)
727{
728 return ts & ~((1ULL << shift) - 1);
729}
730
731
732static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
733 unsigned long bitmap)
734{
735 int index = __ffs(bitmap);
736 return &q->groups[index];
737}
738
739static inline unsigned long mask_from(unsigned long bitmap, int from)
740{
741 return bitmap & ~((1UL << from) - 1);
742}
743
744
745
746
747
748
749static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
750{
751
752 unsigned int state = qfq_gt(grp->S, q->V);
753 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
754 struct qfq_group *next;
755
756 if (mask) {
757 next = qfq_ffs(q, mask);
758 if (qfq_gt(grp->F, next->F))
759 state |= EB;
760 }
761
762 return state;
763}
764
765
766
767
768
769
770
771
772static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
773 int src, int dst)
774{
775 q->bitmaps[dst] |= q->bitmaps[src] & mask;
776 q->bitmaps[src] &= ~mask;
777}
778
779static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
780{
781 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
782 struct qfq_group *next;
783
784 if (mask) {
785 next = qfq_ffs(q, mask);
786 if (!qfq_gt(next->F, old_F))
787 return;
788 }
789
790 mask = (1UL << index) - 1;
791 qfq_move_groups(q, mask, EB, ER);
792 qfq_move_groups(q, mask, IB, IR);
793}
794
795
796
797
798
799
800
801
802
803
804
805static void qfq_make_eligible(struct qfq_sched *q)
806{
807 unsigned long vslot = q->V >> q->min_slot_shift;
808 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
809
810 if (vslot != old_vslot) {
811 unsigned long mask;
812 int last_flip_pos = fls(vslot ^ old_vslot);
813
814 if (last_flip_pos > 31)
815 mask = ~0UL;
816 else
817 mask = (1UL << last_flip_pos) - 1;
818
819 qfq_move_groups(q, mask, IR, ER);
820 qfq_move_groups(q, mask, IB, EB);
821 }
822}
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
880 u64 roundedS)
881{
882 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
883 unsigned int i;
884
885 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
886 u64 deltaS = roundedS - grp->S -
887 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
888 agg->S -= deltaS;
889 agg->F -= deltaS;
890 slot = QFQ_MAX_SLOTS - 2;
891 }
892
893 i = (grp->front + slot) % QFQ_MAX_SLOTS;
894
895 hlist_add_head(&agg->next, &grp->slots[i]);
896 __set_bit(slot, &grp->full_slots);
897}
898
899
900static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
901{
902 return hlist_entry(grp->slots[grp->front].first,
903 struct qfq_aggregate, next);
904}
905
906
907
908
909static void qfq_front_slot_remove(struct qfq_group *grp)
910{
911 struct qfq_aggregate *agg = qfq_slot_head(grp);
912
913 BUG_ON(!agg);
914 hlist_del(&agg->next);
915 if (hlist_empty(&grp->slots[grp->front]))
916 __clear_bit(0, &grp->full_slots);
917}
918
919
920
921
922
923
924static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
925{
926 unsigned int i;
927
928 pr_debug("qfq slot_scan: grp %u full %#lx\n",
929 grp->index, grp->full_slots);
930
931 if (grp->full_slots == 0)
932 return NULL;
933
934 i = __ffs(grp->full_slots);
935 if (i > 0) {
936 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
937 grp->full_slots >>= i;
938 }
939
940 return qfq_slot_head(grp);
941}
942
943
944
945
946
947
948
949
950
951
952static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
953{
954 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
955
956 grp->full_slots <<= i;
957 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
958}
959
960static void qfq_update_eligible(struct qfq_sched *q)
961{
962 struct qfq_group *grp;
963 unsigned long ineligible;
964
965 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
966 if (ineligible) {
967 if (!q->bitmaps[ER]) {
968 grp = qfq_ffs(q, ineligible);
969 if (qfq_gt(grp->S, q->V))
970 q->V = grp->S;
971 }
972 qfq_make_eligible(q);
973 }
974}
975
976
977static void agg_dequeue(struct qfq_aggregate *agg,
978 struct qfq_class *cl, unsigned int len)
979{
980 qdisc_dequeue_peeked(cl->qdisc);
981
982 cl->deficit -= (int) len;
983
984 if (cl->qdisc->q.qlen == 0)
985 list_del(&cl->alist);
986 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
987 cl->deficit += agg->lmax;
988 list_move_tail(&cl->alist, &agg->active);
989 }
990}
991
992static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
993 struct qfq_class **cl,
994 unsigned int *len)
995{
996 struct sk_buff *skb;
997
998 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
999 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
1000 if (skb == NULL)
1001 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1002 else
1003 *len = qdisc_pkt_len(skb);
1004
1005 return skb;
1006}
1007
1008
1009static inline void charge_actual_service(struct qfq_aggregate *agg)
1010{
1011
1012
1013
1014
1015
1016 u32 service_received = min(agg->budgetmax,
1017 agg->initial_budget - agg->budget);
1018
1019 agg->F = agg->S + (u64)service_received * agg->inv_w;
1020}
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1035{
1036 unsigned long mask;
1037 u64 limit, roundedF;
1038 int slot_shift = agg->grp->slot_shift;
1039
1040 roundedF = qfq_round_down(agg->F, slot_shift);
1041 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1042
1043 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1044
1045 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1046 if (mask) {
1047 struct qfq_group *next = qfq_ffs(q, mask);
1048 if (qfq_gt(roundedF, next->F)) {
1049 if (qfq_gt(limit, next->F))
1050 agg->S = next->F;
1051 else
1052 agg->S = limit;
1053 return;
1054 }
1055 }
1056 agg->S = q->V;
1057 } else
1058 agg->S = agg->F;
1059}
1060
1061
1062
1063
1064
1065
1066static inline void
1067qfq_update_agg_ts(struct qfq_sched *q,
1068 struct qfq_aggregate *agg, enum update_reason reason)
1069{
1070 if (reason != requeue)
1071 qfq_update_start(q, agg);
1072 else
1073 agg->S = agg->F;
1074
1075 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1076}
1077
1078static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1079
1080static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1081{
1082 struct qfq_sched *q = qdisc_priv(sch);
1083 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1084 struct qfq_class *cl;
1085 struct sk_buff *skb = NULL;
1086
1087 unsigned int len = 0;
1088
1089 if (in_serv_agg == NULL)
1090 return NULL;
1091
1092 if (!list_empty(&in_serv_agg->active))
1093 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1094
1095
1096
1097
1098
1099
1100 if (len == 0 || in_serv_agg->budget < len) {
1101 charge_actual_service(in_serv_agg);
1102
1103
1104 in_serv_agg->initial_budget = in_serv_agg->budget =
1105 in_serv_agg->budgetmax;
1106
1107 if (!list_empty(&in_serv_agg->active)) {
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 qfq_update_agg_ts(q, in_serv_agg, requeue);
1119 qfq_schedule_agg(q, in_serv_agg);
1120 } else if (sch->q.qlen == 0) {
1121 q->in_serv_agg = NULL;
1122 return NULL;
1123 }
1124
1125
1126
1127
1128
1129 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1130 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1131 }
1132 if (!skb)
1133 return NULL;
1134
1135 qdisc_qstats_backlog_dec(sch, skb);
1136 sch->q.qlen--;
1137 qdisc_bstats_update(sch, skb);
1138
1139 agg_dequeue(in_serv_agg, cl, len);
1140
1141
1142
1143
1144 if (unlikely(in_serv_agg->budget < len))
1145 in_serv_agg->budget = 0;
1146 else
1147 in_serv_agg->budget -= len;
1148
1149 q->V += (u64)len * q->iwsum;
1150 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1151 len, (unsigned long long) in_serv_agg->F,
1152 (unsigned long long) q->V);
1153
1154 return skb;
1155}
1156
1157static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1158{
1159 struct qfq_group *grp;
1160 struct qfq_aggregate *agg, *new_front_agg;
1161 u64 old_F;
1162
1163 qfq_update_eligible(q);
1164 q->oldV = q->V;
1165
1166 if (!q->bitmaps[ER])
1167 return NULL;
1168
1169 grp = qfq_ffs(q, q->bitmaps[ER]);
1170 old_F = grp->F;
1171
1172 agg = qfq_slot_head(grp);
1173
1174
1175 qfq_front_slot_remove(grp);
1176
1177 new_front_agg = qfq_slot_scan(grp);
1178
1179 if (new_front_agg == NULL)
1180 __clear_bit(grp->index, &q->bitmaps[ER]);
1181 else {
1182 u64 roundedS = qfq_round_down(new_front_agg->S,
1183 grp->slot_shift);
1184 unsigned int s;
1185
1186 if (grp->S == roundedS)
1187 return agg;
1188 grp->S = roundedS;
1189 grp->F = roundedS + (2ULL << grp->slot_shift);
1190 __clear_bit(grp->index, &q->bitmaps[ER]);
1191 s = qfq_calc_state(q, grp);
1192 __set_bit(grp->index, &q->bitmaps[s]);
1193 }
1194
1195 qfq_unblock_groups(q, grp->index, old_F);
1196
1197 return agg;
1198}
1199
1200static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1201 struct sk_buff **to_free)
1202{
1203 unsigned int len = qdisc_pkt_len(skb), gso_segs;
1204 struct qfq_sched *q = qdisc_priv(sch);
1205 struct qfq_class *cl;
1206 struct qfq_aggregate *agg;
1207 int err = 0;
1208 bool first;
1209
1210 cl = qfq_classify(skb, sch, &err);
1211 if (cl == NULL) {
1212 if (err & __NET_XMIT_BYPASS)
1213 qdisc_qstats_drop(sch);
1214 __qdisc_drop(skb, to_free);
1215 return err;
1216 }
1217 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1218
1219 if (unlikely(cl->agg->lmax < len)) {
1220 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1221 cl->agg->lmax, len, cl->common.classid);
1222 err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
1223 if (err) {
1224 cl->qstats.drops++;
1225 return qdisc_drop(skb, sch, to_free);
1226 }
1227 }
1228
1229 gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
1230 first = !cl->qdisc->q.qlen;
1231 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1232 if (unlikely(err != NET_XMIT_SUCCESS)) {
1233 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1234 if (net_xmit_drop_count(err)) {
1235 cl->qstats.drops++;
1236 qdisc_qstats_drop(sch);
1237 }
1238 return err;
1239 }
1240
1241 cl->bstats.bytes += len;
1242 cl->bstats.packets += gso_segs;
1243 sch->qstats.backlog += len;
1244 ++sch->q.qlen;
1245
1246 agg = cl->agg;
1247
1248 if (!first) {
1249 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1250 list_first_entry(&agg->active, struct qfq_class, alist)
1251 == cl && cl->deficit < len)
1252 list_move_tail(&cl->alist, &agg->active);
1253
1254 return err;
1255 }
1256
1257
1258 cl->deficit = agg->lmax;
1259 list_add_tail(&cl->alist, &agg->active);
1260
1261 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1262 q->in_serv_agg == agg)
1263 return err;
1264
1265 qfq_activate_agg(q, agg, enqueue);
1266
1267 return err;
1268}
1269
1270
1271
1272
1273static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1274{
1275 struct qfq_group *grp = agg->grp;
1276 u64 roundedS;
1277 int s;
1278
1279 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290 if (grp->full_slots) {
1291 if (!qfq_gt(grp->S, agg->S))
1292 goto skip_update;
1293
1294
1295 qfq_slot_rotate(grp, roundedS);
1296
1297 __clear_bit(grp->index, &q->bitmaps[IR]);
1298 __clear_bit(grp->index, &q->bitmaps[IB]);
1299 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1300 q->in_serv_agg == NULL)
1301 q->V = roundedS;
1302
1303 grp->S = roundedS;
1304 grp->F = roundedS + (2ULL << grp->slot_shift);
1305 s = qfq_calc_state(q, grp);
1306 __set_bit(grp->index, &q->bitmaps[s]);
1307
1308 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1309 s, q->bitmaps[s],
1310 (unsigned long long) agg->S,
1311 (unsigned long long) agg->F,
1312 (unsigned long long) q->V);
1313
1314skip_update:
1315 qfq_slot_insert(grp, agg, roundedS);
1316}
1317
1318
1319
1320static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1321 enum update_reason reason)
1322{
1323 agg->initial_budget = agg->budget = agg->budgetmax;
1324
1325 qfq_update_agg_ts(q, agg, reason);
1326 if (q->in_serv_agg == NULL) {
1327 q->in_serv_agg = agg;
1328
1329 q->oldV = q->V = agg->S;
1330 } else if (agg != q->in_serv_agg)
1331 qfq_schedule_agg(q, agg);
1332}
1333
1334static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1335 struct qfq_aggregate *agg)
1336{
1337 unsigned int i, offset;
1338 u64 roundedS;
1339
1340 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1341 offset = (roundedS - grp->S) >> grp->slot_shift;
1342
1343 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1344
1345 hlist_del(&agg->next);
1346 if (hlist_empty(&grp->slots[i]))
1347 __clear_bit(offset, &grp->full_slots);
1348}
1349
1350
1351
1352
1353
1354
1355
1356
1357static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1358{
1359 struct qfq_group *grp = agg->grp;
1360 unsigned long mask;
1361 u64 roundedS;
1362 int s;
1363
1364 if (agg == q->in_serv_agg) {
1365 charge_actual_service(agg);
1366 q->in_serv_agg = qfq_choose_next_agg(q);
1367 return;
1368 }
1369
1370 agg->F = agg->S;
1371 qfq_slot_remove(q, grp, agg);
1372
1373 if (!grp->full_slots) {
1374 __clear_bit(grp->index, &q->bitmaps[IR]);
1375 __clear_bit(grp->index, &q->bitmaps[EB]);
1376 __clear_bit(grp->index, &q->bitmaps[IB]);
1377
1378 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1379 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1380 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1381 if (mask)
1382 mask = ~((1UL << __fls(mask)) - 1);
1383 else
1384 mask = ~0UL;
1385 qfq_move_groups(q, mask, EB, ER);
1386 qfq_move_groups(q, mask, IB, IR);
1387 }
1388 __clear_bit(grp->index, &q->bitmaps[ER]);
1389 } else if (hlist_empty(&grp->slots[grp->front])) {
1390 agg = qfq_slot_scan(grp);
1391 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1392 if (grp->S != roundedS) {
1393 __clear_bit(grp->index, &q->bitmaps[ER]);
1394 __clear_bit(grp->index, &q->bitmaps[IR]);
1395 __clear_bit(grp->index, &q->bitmaps[EB]);
1396 __clear_bit(grp->index, &q->bitmaps[IB]);
1397 grp->S = roundedS;
1398 grp->F = roundedS + (2ULL << grp->slot_shift);
1399 s = qfq_calc_state(q, grp);
1400 __set_bit(grp->index, &q->bitmaps[s]);
1401 }
1402 }
1403}
1404
1405static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1406{
1407 struct qfq_sched *q = qdisc_priv(sch);
1408 struct qfq_class *cl = (struct qfq_class *)arg;
1409
1410 qfq_deactivate_class(q, cl);
1411}
1412
1413static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1414 struct netlink_ext_ack *extack)
1415{
1416 struct qfq_sched *q = qdisc_priv(sch);
1417 struct qfq_group *grp;
1418 int i, j, err;
1419 u32 max_cl_shift, maxbudg_shift, max_classes;
1420
1421 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1422 if (err)
1423 return err;
1424
1425 err = qdisc_class_hash_init(&q->clhash);
1426 if (err < 0)
1427 return err;
1428
1429 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1430 max_classes = QFQ_MAX_AGG_CLASSES;
1431 else
1432 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1433
1434 max_cl_shift = __fls(max_classes);
1435 q->max_agg_classes = 1<<max_cl_shift;
1436
1437
1438 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1439 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1440
1441 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1442 grp = &q->groups[i];
1443 grp->index = i;
1444 grp->slot_shift = q->min_slot_shift + i;
1445 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1446 INIT_HLIST_HEAD(&grp->slots[j]);
1447 }
1448
1449 INIT_HLIST_HEAD(&q->nonfull_aggs);
1450
1451 return 0;
1452}
1453
1454static void qfq_reset_qdisc(struct Qdisc *sch)
1455{
1456 struct qfq_sched *q = qdisc_priv(sch);
1457 struct qfq_class *cl;
1458 unsigned int i;
1459
1460 for (i = 0; i < q->clhash.hashsize; i++) {
1461 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1462 if (cl->qdisc->q.qlen > 0)
1463 qfq_deactivate_class(q, cl);
1464
1465 qdisc_reset(cl->qdisc);
1466 }
1467 }
1468 sch->qstats.backlog = 0;
1469 sch->q.qlen = 0;
1470}
1471
1472static void qfq_destroy_qdisc(struct Qdisc *sch)
1473{
1474 struct qfq_sched *q = qdisc_priv(sch);
1475 struct qfq_class *cl;
1476 struct hlist_node *next;
1477 unsigned int i;
1478
1479 tcf_block_put(q->block);
1480
1481 for (i = 0; i < q->clhash.hashsize; i++) {
1482 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1483 common.hnode) {
1484 qfq_destroy_class(sch, cl);
1485 }
1486 }
1487 qdisc_class_hash_destroy(&q->clhash);
1488}
1489
1490static const struct Qdisc_class_ops qfq_class_ops = {
1491 .change = qfq_change_class,
1492 .delete = qfq_delete_class,
1493 .find = qfq_search_class,
1494 .tcf_block = qfq_tcf_block,
1495 .bind_tcf = qfq_bind_tcf,
1496 .unbind_tcf = qfq_unbind_tcf,
1497 .graft = qfq_graft_class,
1498 .leaf = qfq_class_leaf,
1499 .qlen_notify = qfq_qlen_notify,
1500 .dump = qfq_dump_class,
1501 .dump_stats = qfq_dump_class_stats,
1502 .walk = qfq_walk,
1503};
1504
1505static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1506 .cl_ops = &qfq_class_ops,
1507 .id = "qfq",
1508 .priv_size = sizeof(struct qfq_sched),
1509 .enqueue = qfq_enqueue,
1510 .dequeue = qfq_dequeue,
1511 .peek = qdisc_peek_dequeued,
1512 .init = qfq_init_qdisc,
1513 .reset = qfq_reset_qdisc,
1514 .destroy = qfq_destroy_qdisc,
1515 .owner = THIS_MODULE,
1516};
1517
1518static int __init qfq_init(void)
1519{
1520 return register_qdisc(&qfq_qdisc_ops);
1521}
1522
1523static void __exit qfq_exit(void)
1524{
1525 unregister_qdisc(&qfq_qdisc_ops);
1526}
1527
1528module_init(qfq_init);
1529module_exit(qfq_exit);
1530MODULE_LICENSE("GPL");
1531