1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116
117#define QFQ_MTU_SHIFT 16
118#define QFQ_MIN_LMAX 512
119
120#define QFQ_MAX_AGG_CLASSES 8
121
122
123
124
125
126enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
127
128struct qfq_group;
129
130struct qfq_aggregate;
131
132struct qfq_class {
133 struct Qdisc_class_common common;
134
135 unsigned int filter_cnt;
136
137 struct gnet_stats_basic_packed bstats;
138 struct gnet_stats_queue qstats;
139 struct net_rate_estimator __rcu *rate_est;
140 struct Qdisc *qdisc;
141 struct list_head alist;
142 struct qfq_aggregate *agg;
143 int deficit;
144};
145
146struct qfq_aggregate {
147 struct hlist_node next;
148 u64 S, F;
149
150
151
152
153
154 struct qfq_group *grp;
155
156
157 u32 class_weight;
158
159 int lmax;
160
161 u32 inv_w;
162 u32 budgetmax;
163 u32 initial_budget, budget;
164
165 int num_classes;
166 struct list_head active;
167
168 struct hlist_node nonfull_next;
169};
170
171struct qfq_group {
172 u64 S, F;
173 unsigned int slot_shift;
174 unsigned int index;
175 unsigned int front;
176 unsigned long full_slots;
177
178
179 struct hlist_head slots[QFQ_MAX_SLOTS];
180};
181
182struct qfq_sched {
183 struct tcf_proto __rcu *filter_list;
184 struct tcf_block *block;
185 struct Qdisc_class_hash clhash;
186
187 u64 oldV, V;
188 struct qfq_aggregate *in_serv_agg;
189 u32 wsum;
190 u32 iwsum;
191
192 unsigned long bitmaps[QFQ_MAX_STATE];
193 struct qfq_group groups[QFQ_MAX_INDEX + 1];
194 u32 min_slot_shift;
195
196 u32 max_agg_classes;
197 struct hlist_head nonfull_aggs;
198};
199
200
201
202
203
204
205
206
207enum update_reason {enqueue, requeue};
208
209static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
210{
211 struct qfq_sched *q = qdisc_priv(sch);
212 struct Qdisc_class_common *clc;
213
214 clc = qdisc_class_find(&q->clhash, classid);
215 if (clc == NULL)
216 return NULL;
217 return container_of(clc, struct qfq_class, common);
218}
219
220static void qfq_purge_queue(struct qfq_class *cl)
221{
222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
224
225 qdisc_reset(cl->qdisc);
226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
227}
228
229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
230 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
231 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
232};
233
234
235
236
237
238
239static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
240{
241 u64 slot_size = (u64)maxlen * inv_w;
242 unsigned long size_map;
243 int index = 0;
244
245 size_map = slot_size >> min_slot_shift;
246 if (!size_map)
247 goto out;
248
249 index = __fls(size_map) + 1;
250 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
251
252 if (index < 0)
253 index = 0;
254out:
255 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
256 (unsigned long) ONE_FP/inv_w, maxlen, index);
257
258 return index;
259}
260
261static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
262static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
263 enum update_reason);
264
265static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
266 u32 lmax, u32 weight)
267{
268 INIT_LIST_HEAD(&agg->active);
269 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
270
271 agg->lmax = lmax;
272 agg->class_weight = weight;
273}
274
275static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
276 u32 lmax, u32 weight)
277{
278 struct qfq_aggregate *agg;
279
280 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
281 if (agg->lmax == lmax && agg->class_weight == weight)
282 return agg;
283
284 return NULL;
285}
286
287
288
289static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
290 int new_num_classes)
291{
292 u32 new_agg_weight;
293
294 if (new_num_classes == q->max_agg_classes)
295 hlist_del_init(&agg->nonfull_next);
296
297 if (agg->num_classes > new_num_classes &&
298 new_num_classes == q->max_agg_classes - 1)
299 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
300
301
302
303
304
305 agg->budgetmax = new_num_classes * agg->lmax;
306 new_agg_weight = agg->class_weight * new_num_classes;
307 agg->inv_w = ONE_FP/new_agg_weight;
308
309 if (agg->grp == NULL) {
310 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
311 q->min_slot_shift);
312 agg->grp = &q->groups[i];
313 }
314
315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
318
319 agg->num_classes = new_num_classes;
320}
321
322
323static void qfq_add_to_agg(struct qfq_sched *q,
324 struct qfq_aggregate *agg,
325 struct qfq_class *cl)
326{
327 cl->agg = agg;
328
329 qfq_update_agg(q, agg, agg->num_classes+1);
330 if (cl->qdisc->q.qlen > 0) {
331 list_add_tail(&cl->alist, &agg->active);
332 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
333 cl && q->in_serv_agg != agg)
334 qfq_activate_agg(q, agg, enqueue);
335 }
336}
337
338static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
339
340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
341{
342 hlist_del_init(&agg->nonfull_next);
343 q->wsum -= agg->class_weight;
344 if (q->wsum != 0)
345 q->iwsum = ONE_FP / q->wsum;
346
347 if (q->in_serv_agg == agg)
348 q->in_serv_agg = qfq_choose_next_agg(q);
349 kfree(agg);
350}
351
352
353static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
354{
355 struct qfq_aggregate *agg = cl->agg;
356
357
358 list_del(&cl->alist);
359 if (list_empty(&agg->active))
360 qfq_deactivate_agg(q, agg);
361}
362
363
364static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
365{
366 struct qfq_aggregate *agg = cl->agg;
367
368 cl->agg = NULL;
369 if (agg->num_classes == 1) {
370 qfq_destroy_agg(q, agg);
371 return;
372 }
373 qfq_update_agg(q, agg, agg->num_classes-1);
374}
375
376
377static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
378{
379 if (cl->qdisc->q.qlen > 0)
380 qfq_deactivate_class(q, cl);
381
382 qfq_rm_from_agg(q, cl);
383}
384
385
386static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
387 u32 lmax)
388{
389 struct qfq_sched *q = qdisc_priv(sch);
390 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
391
392 if (new_agg == NULL) {
393 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
394 if (new_agg == NULL)
395 return -ENOBUFS;
396 qfq_init_agg(q, new_agg, lmax, weight);
397 }
398 qfq_deact_rm_from_agg(q, cl);
399 qfq_add_to_agg(q, new_agg, cl);
400
401 return 0;
402}
403
404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
405 struct nlattr **tca, unsigned long *arg,
406 struct netlink_ext_ack *extack)
407{
408 struct qfq_sched *q = qdisc_priv(sch);
409 struct qfq_class *cl = (struct qfq_class *)*arg;
410 bool existing = false;
411 struct nlattr *tb[TCA_QFQ_MAX + 1];
412 struct qfq_aggregate *new_agg = NULL;
413 u32 weight, lmax, inv_w;
414 int err;
415 int delta_w;
416
417 if (tca[TCA_OPTIONS] == NULL) {
418 pr_notice("qfq: no options\n");
419 return -EINVAL;
420 }
421
422 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy,
423 NULL);
424 if (err < 0)
425 return err;
426
427 if (tb[TCA_QFQ_WEIGHT]) {
428 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
429 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
430 pr_notice("qfq: invalid weight %u\n", weight);
431 return -EINVAL;
432 }
433 } else
434 weight = 1;
435
436 if (tb[TCA_QFQ_LMAX]) {
437 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
438 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
439 pr_notice("qfq: invalid max length %u\n", lmax);
440 return -EINVAL;
441 }
442 } else
443 lmax = psched_mtu(qdisc_dev(sch));
444
445 inv_w = ONE_FP / weight;
446 weight = ONE_FP / inv_w;
447
448 if (cl != NULL &&
449 lmax == cl->agg->lmax &&
450 weight == cl->agg->class_weight)
451 return 0;
452
453 delta_w = weight - (cl ? cl->agg->class_weight : 0);
454
455 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
456 pr_notice("qfq: total weight out of range (%d + %u)\n",
457 delta_w, q->wsum);
458 return -EINVAL;
459 }
460
461 if (cl != NULL) {
462 if (tca[TCA_RATE]) {
463 err = gen_replace_estimator(&cl->bstats, NULL,
464 &cl->rate_est,
465 NULL,
466 qdisc_root_sleeping_running(sch),
467 tca[TCA_RATE]);
468 if (err)
469 return err;
470 }
471 existing = true;
472 goto set_change_agg;
473 }
474
475
476 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
477 if (cl == NULL)
478 return -ENOBUFS;
479
480 cl->common.classid = classid;
481 cl->deficit = lmax;
482
483 cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
484 classid, NULL);
485 if (cl->qdisc == NULL)
486 cl->qdisc = &noop_qdisc;
487
488 if (tca[TCA_RATE]) {
489 err = gen_new_estimator(&cl->bstats, NULL,
490 &cl->rate_est,
491 NULL,
492 qdisc_root_sleeping_running(sch),
493 tca[TCA_RATE]);
494 if (err)
495 goto destroy_class;
496 }
497
498 if (cl->qdisc != &noop_qdisc)
499 qdisc_hash_add(cl->qdisc, true);
500 sch_tree_lock(sch);
501 qdisc_class_hash_insert(&q->clhash, &cl->common);
502 sch_tree_unlock(sch);
503
504 qdisc_class_hash_grow(sch, &q->clhash);
505
506set_change_agg:
507 sch_tree_lock(sch);
508 new_agg = qfq_find_agg(q, lmax, weight);
509 if (new_agg == NULL) {
510 sch_tree_unlock(sch);
511 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
512 if (new_agg == NULL) {
513 err = -ENOBUFS;
514 gen_kill_estimator(&cl->rate_est);
515 goto destroy_class;
516 }
517 sch_tree_lock(sch);
518 qfq_init_agg(q, new_agg, lmax, weight);
519 }
520 if (existing)
521 qfq_deact_rm_from_agg(q, cl);
522 qfq_add_to_agg(q, new_agg, cl);
523 sch_tree_unlock(sch);
524
525 *arg = (unsigned long)cl;
526 return 0;
527
528destroy_class:
529 qdisc_destroy(cl->qdisc);
530 kfree(cl);
531 return err;
532}
533
534static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
535{
536 struct qfq_sched *q = qdisc_priv(sch);
537
538 qfq_rm_from_agg(q, cl);
539 gen_kill_estimator(&cl->rate_est);
540 qdisc_destroy(cl->qdisc);
541 kfree(cl);
542}
543
544static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
545{
546 struct qfq_sched *q = qdisc_priv(sch);
547 struct qfq_class *cl = (struct qfq_class *)arg;
548
549 if (cl->filter_cnt > 0)
550 return -EBUSY;
551
552 sch_tree_lock(sch);
553
554 qfq_purge_queue(cl);
555 qdisc_class_hash_remove(&q->clhash, &cl->common);
556
557 sch_tree_unlock(sch);
558
559 qfq_destroy_class(sch, cl);
560 return 0;
561}
562
563static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
564{
565 return (unsigned long)qfq_find_class(sch, classid);
566}
567
568static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
569 struct netlink_ext_ack *extack)
570{
571 struct qfq_sched *q = qdisc_priv(sch);
572
573 if (cl)
574 return NULL;
575
576 return q->block;
577}
578
579static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
580 u32 classid)
581{
582 struct qfq_class *cl = qfq_find_class(sch, classid);
583
584 if (cl != NULL)
585 cl->filter_cnt++;
586
587 return (unsigned long)cl;
588}
589
590static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
591{
592 struct qfq_class *cl = (struct qfq_class *)arg;
593
594 cl->filter_cnt--;
595}
596
597static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
598 struct Qdisc *new, struct Qdisc **old,
599 struct netlink_ext_ack *extack)
600{
601 struct qfq_class *cl = (struct qfq_class *)arg;
602
603 if (new == NULL) {
604 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
605 cl->common.classid, NULL);
606 if (new == NULL)
607 new = &noop_qdisc;
608 }
609
610 *old = qdisc_replace(sch, new, &cl->qdisc);
611 return 0;
612}
613
614static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
615{
616 struct qfq_class *cl = (struct qfq_class *)arg;
617
618 return cl->qdisc;
619}
620
621static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
622 struct sk_buff *skb, struct tcmsg *tcm)
623{
624 struct qfq_class *cl = (struct qfq_class *)arg;
625 struct nlattr *nest;
626
627 tcm->tcm_parent = TC_H_ROOT;
628 tcm->tcm_handle = cl->common.classid;
629 tcm->tcm_info = cl->qdisc->handle;
630
631 nest = nla_nest_start(skb, TCA_OPTIONS);
632 if (nest == NULL)
633 goto nla_put_failure;
634 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
635 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
636 goto nla_put_failure;
637 return nla_nest_end(skb, nest);
638
639nla_put_failure:
640 nla_nest_cancel(skb, nest);
641 return -EMSGSIZE;
642}
643
644static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
645 struct gnet_dump *d)
646{
647 struct qfq_class *cl = (struct qfq_class *)arg;
648 struct tc_qfq_stats xstats;
649
650 memset(&xstats, 0, sizeof(xstats));
651
652 xstats.weight = cl->agg->class_weight;
653 xstats.lmax = cl->agg->lmax;
654
655 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
656 d, NULL, &cl->bstats) < 0 ||
657 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
658 gnet_stats_copy_queue(d, NULL,
659 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
660 return -1;
661
662 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
663}
664
665static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
666{
667 struct qfq_sched *q = qdisc_priv(sch);
668 struct qfq_class *cl;
669 unsigned int i;
670
671 if (arg->stop)
672 return;
673
674 for (i = 0; i < q->clhash.hashsize; i++) {
675 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
676 if (arg->count < arg->skip) {
677 arg->count++;
678 continue;
679 }
680 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
681 arg->stop = 1;
682 return;
683 }
684 arg->count++;
685 }
686 }
687}
688
689static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
690 int *qerr)
691{
692 struct qfq_sched *q = qdisc_priv(sch);
693 struct qfq_class *cl;
694 struct tcf_result res;
695 struct tcf_proto *fl;
696 int result;
697
698 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
699 pr_debug("qfq_classify: found %d\n", skb->priority);
700 cl = qfq_find_class(sch, skb->priority);
701 if (cl != NULL)
702 return cl;
703 }
704
705 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
706 fl = rcu_dereference_bh(q->filter_list);
707 result = tcf_classify(skb, fl, &res, false);
708 if (result >= 0) {
709#ifdef CONFIG_NET_CLS_ACT
710 switch (result) {
711 case TC_ACT_QUEUED:
712 case TC_ACT_STOLEN:
713 case TC_ACT_TRAP:
714 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
715
716 case TC_ACT_SHOT:
717 return NULL;
718 }
719#endif
720 cl = (struct qfq_class *)res.class;
721 if (cl == NULL)
722 cl = qfq_find_class(sch, res.classid);
723 return cl;
724 }
725
726 return NULL;
727}
728
729
730static inline int qfq_gt(u64 a, u64 b)
731{
732 return (s64)(a - b) > 0;
733}
734
735
736static inline u64 qfq_round_down(u64 ts, unsigned int shift)
737{
738 return ts & ~((1ULL << shift) - 1);
739}
740
741
742static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
743 unsigned long bitmap)
744{
745 int index = __ffs(bitmap);
746 return &q->groups[index];
747}
748
749static inline unsigned long mask_from(unsigned long bitmap, int from)
750{
751 return bitmap & ~((1UL << from) - 1);
752}
753
754
755
756
757
758
759static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
760{
761
762 unsigned int state = qfq_gt(grp->S, q->V);
763 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
764 struct qfq_group *next;
765
766 if (mask) {
767 next = qfq_ffs(q, mask);
768 if (qfq_gt(grp->F, next->F))
769 state |= EB;
770 }
771
772 return state;
773}
774
775
776
777
778
779
780
781
782static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
783 int src, int dst)
784{
785 q->bitmaps[dst] |= q->bitmaps[src] & mask;
786 q->bitmaps[src] &= ~mask;
787}
788
789static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
790{
791 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
792 struct qfq_group *next;
793
794 if (mask) {
795 next = qfq_ffs(q, mask);
796 if (!qfq_gt(next->F, old_F))
797 return;
798 }
799
800 mask = (1UL << index) - 1;
801 qfq_move_groups(q, mask, EB, ER);
802 qfq_move_groups(q, mask, IB, IR);
803}
804
805
806
807
808
809
810
811
812
813
814
815static void qfq_make_eligible(struct qfq_sched *q)
816{
817 unsigned long vslot = q->V >> q->min_slot_shift;
818 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
819
820 if (vslot != old_vslot) {
821 unsigned long mask;
822 int last_flip_pos = fls(vslot ^ old_vslot);
823
824 if (last_flip_pos > 31)
825 mask = ~0UL;
826 else
827 mask = (1UL << last_flip_pos) - 1;
828
829 qfq_move_groups(q, mask, IR, ER);
830 qfq_move_groups(q, mask, IB, EB);
831 }
832}
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
890 u64 roundedS)
891{
892 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
893 unsigned int i;
894
895 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
896 u64 deltaS = roundedS - grp->S -
897 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
898 agg->S -= deltaS;
899 agg->F -= deltaS;
900 slot = QFQ_MAX_SLOTS - 2;
901 }
902
903 i = (grp->front + slot) % QFQ_MAX_SLOTS;
904
905 hlist_add_head(&agg->next, &grp->slots[i]);
906 __set_bit(slot, &grp->full_slots);
907}
908
909
910static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
911{
912 return hlist_entry(grp->slots[grp->front].first,
913 struct qfq_aggregate, next);
914}
915
916
917
918
919static void qfq_front_slot_remove(struct qfq_group *grp)
920{
921 struct qfq_aggregate *agg = qfq_slot_head(grp);
922
923 BUG_ON(!agg);
924 hlist_del(&agg->next);
925 if (hlist_empty(&grp->slots[grp->front]))
926 __clear_bit(0, &grp->full_slots);
927}
928
929
930
931
932
933
934static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
935{
936 unsigned int i;
937
938 pr_debug("qfq slot_scan: grp %u full %#lx\n",
939 grp->index, grp->full_slots);
940
941 if (grp->full_slots == 0)
942 return NULL;
943
944 i = __ffs(grp->full_slots);
945 if (i > 0) {
946 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
947 grp->full_slots >>= i;
948 }
949
950 return qfq_slot_head(grp);
951}
952
953
954
955
956
957
958
959
960
961
962static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
963{
964 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
965
966 grp->full_slots <<= i;
967 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
968}
969
970static void qfq_update_eligible(struct qfq_sched *q)
971{
972 struct qfq_group *grp;
973 unsigned long ineligible;
974
975 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
976 if (ineligible) {
977 if (!q->bitmaps[ER]) {
978 grp = qfq_ffs(q, ineligible);
979 if (qfq_gt(grp->S, q->V))
980 q->V = grp->S;
981 }
982 qfq_make_eligible(q);
983 }
984}
985
986
987static void agg_dequeue(struct qfq_aggregate *agg,
988 struct qfq_class *cl, unsigned int len)
989{
990 qdisc_dequeue_peeked(cl->qdisc);
991
992 cl->deficit -= (int) len;
993
994 if (cl->qdisc->q.qlen == 0)
995 list_del(&cl->alist);
996 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
997 cl->deficit += agg->lmax;
998 list_move_tail(&cl->alist, &agg->active);
999 }
1000}
1001
1002static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
1003 struct qfq_class **cl,
1004 unsigned int *len)
1005{
1006 struct sk_buff *skb;
1007
1008 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
1009 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
1010 if (skb == NULL)
1011 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1012 else
1013 *len = qdisc_pkt_len(skb);
1014
1015 return skb;
1016}
1017
1018
1019static inline void charge_actual_service(struct qfq_aggregate *agg)
1020{
1021
1022
1023
1024
1025
1026 u32 service_received = min(agg->budgetmax,
1027 agg->initial_budget - agg->budget);
1028
1029 agg->F = agg->S + (u64)service_received * agg->inv_w;
1030}
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1045{
1046 unsigned long mask;
1047 u64 limit, roundedF;
1048 int slot_shift = agg->grp->slot_shift;
1049
1050 roundedF = qfq_round_down(agg->F, slot_shift);
1051 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1052
1053 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1054
1055 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1056 if (mask) {
1057 struct qfq_group *next = qfq_ffs(q, mask);
1058 if (qfq_gt(roundedF, next->F)) {
1059 if (qfq_gt(limit, next->F))
1060 agg->S = next->F;
1061 else
1062 agg->S = limit;
1063 return;
1064 }
1065 }
1066 agg->S = q->V;
1067 } else
1068 agg->S = agg->F;
1069}
1070
1071
1072
1073
1074
1075
1076static inline void
1077qfq_update_agg_ts(struct qfq_sched *q,
1078 struct qfq_aggregate *agg, enum update_reason reason)
1079{
1080 if (reason != requeue)
1081 qfq_update_start(q, agg);
1082 else
1083 agg->S = agg->F;
1084
1085 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1086}
1087
1088static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1089
1090static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1091{
1092 struct qfq_sched *q = qdisc_priv(sch);
1093 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1094 struct qfq_class *cl;
1095 struct sk_buff *skb = NULL;
1096
1097 unsigned int len = 0;
1098
1099 if (in_serv_agg == NULL)
1100 return NULL;
1101
1102 if (!list_empty(&in_serv_agg->active))
1103 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1104
1105
1106
1107
1108
1109
1110 if (len == 0 || in_serv_agg->budget < len) {
1111 charge_actual_service(in_serv_agg);
1112
1113
1114 in_serv_agg->initial_budget = in_serv_agg->budget =
1115 in_serv_agg->budgetmax;
1116
1117 if (!list_empty(&in_serv_agg->active)) {
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128 qfq_update_agg_ts(q, in_serv_agg, requeue);
1129 qfq_schedule_agg(q, in_serv_agg);
1130 } else if (sch->q.qlen == 0) {
1131 q->in_serv_agg = NULL;
1132 return NULL;
1133 }
1134
1135
1136
1137
1138
1139 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1140 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1141 }
1142 if (!skb)
1143 return NULL;
1144
1145 qdisc_qstats_backlog_dec(sch, skb);
1146 sch->q.qlen--;
1147 qdisc_bstats_update(sch, skb);
1148
1149 agg_dequeue(in_serv_agg, cl, len);
1150
1151
1152
1153
1154 if (unlikely(in_serv_agg->budget < len))
1155 in_serv_agg->budget = 0;
1156 else
1157 in_serv_agg->budget -= len;
1158
1159 q->V += (u64)len * q->iwsum;
1160 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1161 len, (unsigned long long) in_serv_agg->F,
1162 (unsigned long long) q->V);
1163
1164 return skb;
1165}
1166
1167static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1168{
1169 struct qfq_group *grp;
1170 struct qfq_aggregate *agg, *new_front_agg;
1171 u64 old_F;
1172
1173 qfq_update_eligible(q);
1174 q->oldV = q->V;
1175
1176 if (!q->bitmaps[ER])
1177 return NULL;
1178
1179 grp = qfq_ffs(q, q->bitmaps[ER]);
1180 old_F = grp->F;
1181
1182 agg = qfq_slot_head(grp);
1183
1184
1185 qfq_front_slot_remove(grp);
1186
1187 new_front_agg = qfq_slot_scan(grp);
1188
1189 if (new_front_agg == NULL)
1190 __clear_bit(grp->index, &q->bitmaps[ER]);
1191 else {
1192 u64 roundedS = qfq_round_down(new_front_agg->S,
1193 grp->slot_shift);
1194 unsigned int s;
1195
1196 if (grp->S == roundedS)
1197 return agg;
1198 grp->S = roundedS;
1199 grp->F = roundedS + (2ULL << grp->slot_shift);
1200 __clear_bit(grp->index, &q->bitmaps[ER]);
1201 s = qfq_calc_state(q, grp);
1202 __set_bit(grp->index, &q->bitmaps[s]);
1203 }
1204
1205 qfq_unblock_groups(q, grp->index, old_F);
1206
1207 return agg;
1208}
1209
1210static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1211 struct sk_buff **to_free)
1212{
1213 struct qfq_sched *q = qdisc_priv(sch);
1214 struct qfq_class *cl;
1215 struct qfq_aggregate *agg;
1216 int err = 0;
1217
1218 cl = qfq_classify(skb, sch, &err);
1219 if (cl == NULL) {
1220 if (err & __NET_XMIT_BYPASS)
1221 qdisc_qstats_drop(sch);
1222 __qdisc_drop(skb, to_free);
1223 return err;
1224 }
1225 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1226
1227 if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
1228 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1229 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
1230 err = qfq_change_agg(sch, cl, cl->agg->class_weight,
1231 qdisc_pkt_len(skb));
1232 if (err) {
1233 cl->qstats.drops++;
1234 return qdisc_drop(skb, sch, to_free);
1235 }
1236 }
1237
1238 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1239 if (unlikely(err != NET_XMIT_SUCCESS)) {
1240 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1241 if (net_xmit_drop_count(err)) {
1242 cl->qstats.drops++;
1243 qdisc_qstats_drop(sch);
1244 }
1245 return err;
1246 }
1247
1248 bstats_update(&cl->bstats, skb);
1249 qdisc_qstats_backlog_inc(sch, skb);
1250 ++sch->q.qlen;
1251
1252 agg = cl->agg;
1253
1254 if (cl->qdisc->q.qlen != 1) {
1255 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1256 list_first_entry(&agg->active, struct qfq_class, alist)
1257 == cl && cl->deficit < qdisc_pkt_len(skb))
1258 list_move_tail(&cl->alist, &agg->active);
1259
1260 return err;
1261 }
1262
1263
1264 cl->deficit = agg->lmax;
1265 list_add_tail(&cl->alist, &agg->active);
1266
1267 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1268 q->in_serv_agg == agg)
1269 return err;
1270
1271 qfq_activate_agg(q, agg, enqueue);
1272
1273 return err;
1274}
1275
1276
1277
1278
1279static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1280{
1281 struct qfq_group *grp = agg->grp;
1282 u64 roundedS;
1283 int s;
1284
1285 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 if (grp->full_slots) {
1297 if (!qfq_gt(grp->S, agg->S))
1298 goto skip_update;
1299
1300
1301 qfq_slot_rotate(grp, roundedS);
1302
1303 __clear_bit(grp->index, &q->bitmaps[IR]);
1304 __clear_bit(grp->index, &q->bitmaps[IB]);
1305 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1306 q->in_serv_agg == NULL)
1307 q->V = roundedS;
1308
1309 grp->S = roundedS;
1310 grp->F = roundedS + (2ULL << grp->slot_shift);
1311 s = qfq_calc_state(q, grp);
1312 __set_bit(grp->index, &q->bitmaps[s]);
1313
1314 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1315 s, q->bitmaps[s],
1316 (unsigned long long) agg->S,
1317 (unsigned long long) agg->F,
1318 (unsigned long long) q->V);
1319
1320skip_update:
1321 qfq_slot_insert(grp, agg, roundedS);
1322}
1323
1324
1325
1326static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1327 enum update_reason reason)
1328{
1329 agg->initial_budget = agg->budget = agg->budgetmax;
1330
1331 qfq_update_agg_ts(q, agg, reason);
1332 if (q->in_serv_agg == NULL) {
1333 q->in_serv_agg = agg;
1334
1335 q->oldV = q->V = agg->S;
1336 } else if (agg != q->in_serv_agg)
1337 qfq_schedule_agg(q, agg);
1338}
1339
1340static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1341 struct qfq_aggregate *agg)
1342{
1343 unsigned int i, offset;
1344 u64 roundedS;
1345
1346 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1347 offset = (roundedS - grp->S) >> grp->slot_shift;
1348
1349 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1350
1351 hlist_del(&agg->next);
1352 if (hlist_empty(&grp->slots[i]))
1353 __clear_bit(offset, &grp->full_slots);
1354}
1355
1356
1357
1358
1359
1360
1361
1362
1363static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1364{
1365 struct qfq_group *grp = agg->grp;
1366 unsigned long mask;
1367 u64 roundedS;
1368 int s;
1369
1370 if (agg == q->in_serv_agg) {
1371 charge_actual_service(agg);
1372 q->in_serv_agg = qfq_choose_next_agg(q);
1373 return;
1374 }
1375
1376 agg->F = agg->S;
1377 qfq_slot_remove(q, grp, agg);
1378
1379 if (!grp->full_slots) {
1380 __clear_bit(grp->index, &q->bitmaps[IR]);
1381 __clear_bit(grp->index, &q->bitmaps[EB]);
1382 __clear_bit(grp->index, &q->bitmaps[IB]);
1383
1384 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1385 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1386 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1387 if (mask)
1388 mask = ~((1UL << __fls(mask)) - 1);
1389 else
1390 mask = ~0UL;
1391 qfq_move_groups(q, mask, EB, ER);
1392 qfq_move_groups(q, mask, IB, IR);
1393 }
1394 __clear_bit(grp->index, &q->bitmaps[ER]);
1395 } else if (hlist_empty(&grp->slots[grp->front])) {
1396 agg = qfq_slot_scan(grp);
1397 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1398 if (grp->S != roundedS) {
1399 __clear_bit(grp->index, &q->bitmaps[ER]);
1400 __clear_bit(grp->index, &q->bitmaps[IR]);
1401 __clear_bit(grp->index, &q->bitmaps[EB]);
1402 __clear_bit(grp->index, &q->bitmaps[IB]);
1403 grp->S = roundedS;
1404 grp->F = roundedS + (2ULL << grp->slot_shift);
1405 s = qfq_calc_state(q, grp);
1406 __set_bit(grp->index, &q->bitmaps[s]);
1407 }
1408 }
1409}
1410
1411static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1412{
1413 struct qfq_sched *q = qdisc_priv(sch);
1414 struct qfq_class *cl = (struct qfq_class *)arg;
1415
1416 qfq_deactivate_class(q, cl);
1417}
1418
1419static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
1420 struct netlink_ext_ack *extack)
1421{
1422 struct qfq_sched *q = qdisc_priv(sch);
1423 struct qfq_group *grp;
1424 int i, j, err;
1425 u32 max_cl_shift, maxbudg_shift, max_classes;
1426
1427 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1428 if (err)
1429 return err;
1430
1431 err = qdisc_class_hash_init(&q->clhash);
1432 if (err < 0)
1433 return err;
1434
1435 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1436 max_classes = QFQ_MAX_AGG_CLASSES;
1437 else
1438 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1439
1440 max_cl_shift = __fls(max_classes);
1441 q->max_agg_classes = 1<<max_cl_shift;
1442
1443
1444 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1445 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1446
1447 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1448 grp = &q->groups[i];
1449 grp->index = i;
1450 grp->slot_shift = q->min_slot_shift + i;
1451 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1452 INIT_HLIST_HEAD(&grp->slots[j]);
1453 }
1454
1455 INIT_HLIST_HEAD(&q->nonfull_aggs);
1456
1457 return 0;
1458}
1459
1460static void qfq_reset_qdisc(struct Qdisc *sch)
1461{
1462 struct qfq_sched *q = qdisc_priv(sch);
1463 struct qfq_class *cl;
1464 unsigned int i;
1465
1466 for (i = 0; i < q->clhash.hashsize; i++) {
1467 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1468 if (cl->qdisc->q.qlen > 0)
1469 qfq_deactivate_class(q, cl);
1470
1471 qdisc_reset(cl->qdisc);
1472 }
1473 }
1474 sch->qstats.backlog = 0;
1475 sch->q.qlen = 0;
1476}
1477
1478static void qfq_destroy_qdisc(struct Qdisc *sch)
1479{
1480 struct qfq_sched *q = qdisc_priv(sch);
1481 struct qfq_class *cl;
1482 struct hlist_node *next;
1483 unsigned int i;
1484
1485 tcf_block_put(q->block);
1486
1487 for (i = 0; i < q->clhash.hashsize; i++) {
1488 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1489 common.hnode) {
1490 qfq_destroy_class(sch, cl);
1491 }
1492 }
1493 qdisc_class_hash_destroy(&q->clhash);
1494}
1495
1496static const struct Qdisc_class_ops qfq_class_ops = {
1497 .change = qfq_change_class,
1498 .delete = qfq_delete_class,
1499 .find = qfq_search_class,
1500 .tcf_block = qfq_tcf_block,
1501 .bind_tcf = qfq_bind_tcf,
1502 .unbind_tcf = qfq_unbind_tcf,
1503 .graft = qfq_graft_class,
1504 .leaf = qfq_class_leaf,
1505 .qlen_notify = qfq_qlen_notify,
1506 .dump = qfq_dump_class,
1507 .dump_stats = qfq_dump_class_stats,
1508 .walk = qfq_walk,
1509};
1510
1511static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1512 .cl_ops = &qfq_class_ops,
1513 .id = "qfq",
1514 .priv_size = sizeof(struct qfq_sched),
1515 .enqueue = qfq_enqueue,
1516 .dequeue = qfq_dequeue,
1517 .peek = qdisc_peek_dequeued,
1518 .init = qfq_init_qdisc,
1519 .reset = qfq_reset_qdisc,
1520 .destroy = qfq_destroy_qdisc,
1521 .owner = THIS_MODULE,
1522};
1523
1524static int __init qfq_init(void)
1525{
1526 return register_qdisc(&qfq_qdisc_ops);
1527}
1528
1529static void __exit qfq_exit(void)
1530{
1531 unregister_qdisc(&qfq_qdisc_ops);
1532}
1533
1534module_init(qfq_init);
1535module_exit(qfq_exit);
1536MODULE_LICENSE("GPL");
1537