1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116
117#define QFQ_MTU_SHIFT 16
118#define QFQ_MIN_LMAX 512
119
120#define QFQ_MAX_AGG_CLASSES 8
121
122
123
124
125
126enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
127
128struct qfq_group;
129
130struct qfq_aggregate;
131
132struct qfq_class {
133 struct Qdisc_class_common common;
134
135 unsigned int filter_cnt;
136
137 struct gnet_stats_basic_packed bstats;
138 struct gnet_stats_queue qstats;
139 struct net_rate_estimator __rcu *rate_est;
140 struct Qdisc *qdisc;
141 struct list_head alist;
142 struct qfq_aggregate *agg;
143 int deficit;
144};
145
146struct qfq_aggregate {
147 struct hlist_node next;
148 u64 S, F;
149
150
151
152
153
154 struct qfq_group *grp;
155
156
157 u32 class_weight;
158
159 int lmax;
160
161 u32 inv_w;
162 u32 budgetmax;
163 u32 initial_budget, budget;
164
165 int num_classes;
166 struct list_head active;
167
168 struct hlist_node nonfull_next;
169};
170
171struct qfq_group {
172 u64 S, F;
173 unsigned int slot_shift;
174 unsigned int index;
175 unsigned int front;
176 unsigned long full_slots;
177
178
179 struct hlist_head slots[QFQ_MAX_SLOTS];
180};
181
182struct qfq_sched {
183 struct tcf_proto __rcu *filter_list;
184 struct tcf_block *block;
185 struct Qdisc_class_hash clhash;
186
187 u64 oldV, V;
188 struct qfq_aggregate *in_serv_agg;
189 u32 wsum;
190 u32 iwsum;
191
192 unsigned long bitmaps[QFQ_MAX_STATE];
193 struct qfq_group groups[QFQ_MAX_INDEX + 1];
194 u32 min_slot_shift;
195
196 u32 max_agg_classes;
197 struct hlist_head nonfull_aggs;
198};
199
200
201
202
203
204
205
206
207enum update_reason {enqueue, requeue};
208
209static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
210{
211 struct qfq_sched *q = qdisc_priv(sch);
212 struct Qdisc_class_common *clc;
213
214 clc = qdisc_class_find(&q->clhash, classid);
215 if (clc == NULL)
216 return NULL;
217 return container_of(clc, struct qfq_class, common);
218}
219
220static void qfq_purge_queue(struct qfq_class *cl)
221{
222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
224
225 qdisc_reset(cl->qdisc);
226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
227}
228
229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
230 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
231 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
232};
233
234
235
236
237
238
239static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
240{
241 u64 slot_size = (u64)maxlen * inv_w;
242 unsigned long size_map;
243 int index = 0;
244
245 size_map = slot_size >> min_slot_shift;
246 if (!size_map)
247 goto out;
248
249 index = __fls(size_map) + 1;
250 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
251
252 if (index < 0)
253 index = 0;
254out:
255 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
256 (unsigned long) ONE_FP/inv_w, maxlen, index);
257
258 return index;
259}
260
261static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
262static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
263 enum update_reason);
264
265static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
266 u32 lmax, u32 weight)
267{
268 INIT_LIST_HEAD(&agg->active);
269 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
270
271 agg->lmax = lmax;
272 agg->class_weight = weight;
273}
274
275static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
276 u32 lmax, u32 weight)
277{
278 struct qfq_aggregate *agg;
279
280 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
281 if (agg->lmax == lmax && agg->class_weight == weight)
282 return agg;
283
284 return NULL;
285}
286
287
288
289static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
290 int new_num_classes)
291{
292 u32 new_agg_weight;
293
294 if (new_num_classes == q->max_agg_classes)
295 hlist_del_init(&agg->nonfull_next);
296
297 if (agg->num_classes > new_num_classes &&
298 new_num_classes == q->max_agg_classes - 1)
299 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
300
301
302
303
304
305 agg->budgetmax = new_num_classes * agg->lmax;
306 new_agg_weight = agg->class_weight * new_num_classes;
307 agg->inv_w = ONE_FP/new_agg_weight;
308
309 if (agg->grp == NULL) {
310 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
311 q->min_slot_shift);
312 agg->grp = &q->groups[i];
313 }
314
315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
318
319 agg->num_classes = new_num_classes;
320}
321
322
323static void qfq_add_to_agg(struct qfq_sched *q,
324 struct qfq_aggregate *agg,
325 struct qfq_class *cl)
326{
327 cl->agg = agg;
328
329 qfq_update_agg(q, agg, agg->num_classes+1);
330 if (cl->qdisc->q.qlen > 0) {
331 list_add_tail(&cl->alist, &agg->active);
332 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
333 cl && q->in_serv_agg != agg)
334 qfq_activate_agg(q, agg, enqueue);
335 }
336}
337
338static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
339
340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
341{
342 hlist_del_init(&agg->nonfull_next);
343 q->wsum -= agg->class_weight;
344 if (q->wsum != 0)
345 q->iwsum = ONE_FP / q->wsum;
346
347 if (q->in_serv_agg == agg)
348 q->in_serv_agg = qfq_choose_next_agg(q);
349 kfree(agg);
350}
351
352
353static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
354{
355 struct qfq_aggregate *agg = cl->agg;
356
357
358 list_del(&cl->alist);
359 if (list_empty(&agg->active))
360 qfq_deactivate_agg(q, agg);
361}
362
363
364static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
365{
366 struct qfq_aggregate *agg = cl->agg;
367
368 cl->agg = NULL;
369 if (agg->num_classes == 1) {
370 qfq_destroy_agg(q, agg);
371 return;
372 }
373 qfq_update_agg(q, agg, agg->num_classes-1);
374}
375
376
377static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
378{
379 if (cl->qdisc->q.qlen > 0)
380 qfq_deactivate_class(q, cl);
381
382 qfq_rm_from_agg(q, cl);
383}
384
385
386static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
387 u32 lmax)
388{
389 struct qfq_sched *q = qdisc_priv(sch);
390 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
391
392 if (new_agg == NULL) {
393 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
394 if (new_agg == NULL)
395 return -ENOBUFS;
396 qfq_init_agg(q, new_agg, lmax, weight);
397 }
398 qfq_deact_rm_from_agg(q, cl);
399 qfq_add_to_agg(q, new_agg, cl);
400
401 return 0;
402}
403
404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
405 struct nlattr **tca, unsigned long *arg)
406{
407 struct qfq_sched *q = qdisc_priv(sch);
408 struct qfq_class *cl = (struct qfq_class *)*arg;
409 bool existing = false;
410 struct nlattr *tb[TCA_QFQ_MAX + 1];
411 struct qfq_aggregate *new_agg = NULL;
412 u32 weight, lmax, inv_w;
413 int err;
414 int delta_w;
415
416 if (tca[TCA_OPTIONS] == NULL) {
417 pr_notice("qfq: no options\n");
418 return -EINVAL;
419 }
420
421 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
422 if (err < 0)
423 return err;
424
425 if (tb[TCA_QFQ_WEIGHT]) {
426 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
427 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
428 pr_notice("qfq: invalid weight %u\n", weight);
429 return -EINVAL;
430 }
431 } else
432 weight = 1;
433
434 if (tb[TCA_QFQ_LMAX]) {
435 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
436 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
437 pr_notice("qfq: invalid max length %u\n", lmax);
438 return -EINVAL;
439 }
440 } else
441 lmax = psched_mtu(qdisc_dev(sch));
442
443 inv_w = ONE_FP / weight;
444 weight = ONE_FP / inv_w;
445
446 if (cl != NULL &&
447 lmax == cl->agg->lmax &&
448 weight == cl->agg->class_weight)
449 return 0;
450
451 delta_w = weight - (cl ? cl->agg->class_weight : 0);
452
453 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
454 pr_notice("qfq: total weight out of range (%d + %u)\n",
455 delta_w, q->wsum);
456 return -EINVAL;
457 }
458
459 if (cl != NULL) {
460 if (tca[TCA_RATE]) {
461 err = gen_replace_estimator(&cl->bstats, NULL,
462 &cl->rate_est,
463 NULL,
464 qdisc_root_sleeping_running(sch),
465 tca[TCA_RATE]);
466 if (err)
467 return err;
468 }
469 existing = true;
470 goto set_change_agg;
471 }
472
473
474 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
475 if (cl == NULL)
476 return -ENOBUFS;
477
478 cl->common.classid = classid;
479 cl->deficit = lmax;
480
481 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
482 &pfifo_qdisc_ops, classid);
483 if (cl->qdisc == NULL)
484 cl->qdisc = &noop_qdisc;
485
486 if (tca[TCA_RATE]) {
487 err = gen_new_estimator(&cl->bstats, NULL,
488 &cl->rate_est,
489 NULL,
490 qdisc_root_sleeping_running(sch),
491 tca[TCA_RATE]);
492 if (err)
493 goto destroy_class;
494 }
495
496 if (cl->qdisc != &noop_qdisc)
497 qdisc_hash_add(cl->qdisc, true);
498 sch_tree_lock(sch);
499 qdisc_class_hash_insert(&q->clhash, &cl->common);
500 sch_tree_unlock(sch);
501
502 qdisc_class_hash_grow(sch, &q->clhash);
503
504set_change_agg:
505 sch_tree_lock(sch);
506 new_agg = qfq_find_agg(q, lmax, weight);
507 if (new_agg == NULL) {
508 sch_tree_unlock(sch);
509 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
510 if (new_agg == NULL) {
511 err = -ENOBUFS;
512 gen_kill_estimator(&cl->rate_est);
513 goto destroy_class;
514 }
515 sch_tree_lock(sch);
516 qfq_init_agg(q, new_agg, lmax, weight);
517 }
518 if (existing)
519 qfq_deact_rm_from_agg(q, cl);
520 qfq_add_to_agg(q, new_agg, cl);
521 sch_tree_unlock(sch);
522
523 *arg = (unsigned long)cl;
524 return 0;
525
526destroy_class:
527 qdisc_destroy(cl->qdisc);
528 kfree(cl);
529 return err;
530}
531
532static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
533{
534 struct qfq_sched *q = qdisc_priv(sch);
535
536 qfq_rm_from_agg(q, cl);
537 gen_kill_estimator(&cl->rate_est);
538 qdisc_destroy(cl->qdisc);
539 kfree(cl);
540}
541
542static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
543{
544 struct qfq_sched *q = qdisc_priv(sch);
545 struct qfq_class *cl = (struct qfq_class *)arg;
546
547 if (cl->filter_cnt > 0)
548 return -EBUSY;
549
550 sch_tree_lock(sch);
551
552 qfq_purge_queue(cl);
553 qdisc_class_hash_remove(&q->clhash, &cl->common);
554
555 sch_tree_unlock(sch);
556
557 qfq_destroy_class(sch, cl);
558 return 0;
559}
560
561static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
562{
563 return (unsigned long)qfq_find_class(sch, classid);
564}
565
566static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
567{
568 struct qfq_sched *q = qdisc_priv(sch);
569
570 if (cl)
571 return NULL;
572
573 return q->block;
574}
575
576static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
577 u32 classid)
578{
579 struct qfq_class *cl = qfq_find_class(sch, classid);
580
581 if (cl != NULL)
582 cl->filter_cnt++;
583
584 return (unsigned long)cl;
585}
586
587static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
588{
589 struct qfq_class *cl = (struct qfq_class *)arg;
590
591 cl->filter_cnt--;
592}
593
594static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
595 struct Qdisc *new, struct Qdisc **old)
596{
597 struct qfq_class *cl = (struct qfq_class *)arg;
598
599 if (new == NULL) {
600 new = qdisc_create_dflt(sch->dev_queue,
601 &pfifo_qdisc_ops, cl->common.classid);
602 if (new == NULL)
603 new = &noop_qdisc;
604 }
605
606 *old = qdisc_replace(sch, new, &cl->qdisc);
607 return 0;
608}
609
610static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
611{
612 struct qfq_class *cl = (struct qfq_class *)arg;
613
614 return cl->qdisc;
615}
616
617static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
618 struct sk_buff *skb, struct tcmsg *tcm)
619{
620 struct qfq_class *cl = (struct qfq_class *)arg;
621 struct nlattr *nest;
622
623 tcm->tcm_parent = TC_H_ROOT;
624 tcm->tcm_handle = cl->common.classid;
625 tcm->tcm_info = cl->qdisc->handle;
626
627 nest = nla_nest_start(skb, TCA_OPTIONS);
628 if (nest == NULL)
629 goto nla_put_failure;
630 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
631 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
632 goto nla_put_failure;
633 return nla_nest_end(skb, nest);
634
635nla_put_failure:
636 nla_nest_cancel(skb, nest);
637 return -EMSGSIZE;
638}
639
640static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
641 struct gnet_dump *d)
642{
643 struct qfq_class *cl = (struct qfq_class *)arg;
644 struct tc_qfq_stats xstats;
645
646 memset(&xstats, 0, sizeof(xstats));
647
648 xstats.weight = cl->agg->class_weight;
649 xstats.lmax = cl->agg->lmax;
650
651 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
652 d, NULL, &cl->bstats) < 0 ||
653 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
654 gnet_stats_copy_queue(d, NULL,
655 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
656 return -1;
657
658 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
659}
660
661static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
662{
663 struct qfq_sched *q = qdisc_priv(sch);
664 struct qfq_class *cl;
665 unsigned int i;
666
667 if (arg->stop)
668 return;
669
670 for (i = 0; i < q->clhash.hashsize; i++) {
671 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
672 if (arg->count < arg->skip) {
673 arg->count++;
674 continue;
675 }
676 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
677 arg->stop = 1;
678 return;
679 }
680 arg->count++;
681 }
682 }
683}
684
685static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
686 int *qerr)
687{
688 struct qfq_sched *q = qdisc_priv(sch);
689 struct qfq_class *cl;
690 struct tcf_result res;
691 struct tcf_proto *fl;
692 int result;
693
694 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
695 pr_debug("qfq_classify: found %d\n", skb->priority);
696 cl = qfq_find_class(sch, skb->priority);
697 if (cl != NULL)
698 return cl;
699 }
700
701 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
702 fl = rcu_dereference_bh(q->filter_list);
703 result = tcf_classify(skb, fl, &res, false);
704 if (result >= 0) {
705#ifdef CONFIG_NET_CLS_ACT
706 switch (result) {
707 case TC_ACT_QUEUED:
708 case TC_ACT_STOLEN:
709 case TC_ACT_TRAP:
710 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
711 case TC_ACT_SHOT:
712 return NULL;
713 }
714#endif
715 cl = (struct qfq_class *)res.class;
716 if (cl == NULL)
717 cl = qfq_find_class(sch, res.classid);
718 return cl;
719 }
720
721 return NULL;
722}
723
724
725static inline int qfq_gt(u64 a, u64 b)
726{
727 return (s64)(a - b) > 0;
728}
729
730
731static inline u64 qfq_round_down(u64 ts, unsigned int shift)
732{
733 return ts & ~((1ULL << shift) - 1);
734}
735
736
737static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
738 unsigned long bitmap)
739{
740 int index = __ffs(bitmap);
741 return &q->groups[index];
742}
743
744static inline unsigned long mask_from(unsigned long bitmap, int from)
745{
746 return bitmap & ~((1UL << from) - 1);
747}
748
749
750
751
752
753
754static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
755{
756
757 unsigned int state = qfq_gt(grp->S, q->V);
758 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
759 struct qfq_group *next;
760
761 if (mask) {
762 next = qfq_ffs(q, mask);
763 if (qfq_gt(grp->F, next->F))
764 state |= EB;
765 }
766
767 return state;
768}
769
770
771
772
773
774
775
776
777static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
778 int src, int dst)
779{
780 q->bitmaps[dst] |= q->bitmaps[src] & mask;
781 q->bitmaps[src] &= ~mask;
782}
783
784static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
785{
786 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
787 struct qfq_group *next;
788
789 if (mask) {
790 next = qfq_ffs(q, mask);
791 if (!qfq_gt(next->F, old_F))
792 return;
793 }
794
795 mask = (1UL << index) - 1;
796 qfq_move_groups(q, mask, EB, ER);
797 qfq_move_groups(q, mask, IB, IR);
798}
799
800
801
802
803
804
805
806
807
808
809
810static void qfq_make_eligible(struct qfq_sched *q)
811{
812 unsigned long vslot = q->V >> q->min_slot_shift;
813 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
814
815 if (vslot != old_vslot) {
816 unsigned long mask;
817 int last_flip_pos = fls(vslot ^ old_vslot);
818
819 if (last_flip_pos > 31)
820 mask = ~0UL;
821 else
822 mask = (1UL << last_flip_pos) - 1;
823
824 qfq_move_groups(q, mask, IR, ER);
825 qfq_move_groups(q, mask, IB, EB);
826 }
827}
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
885 u64 roundedS)
886{
887 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
888 unsigned int i;
889
890 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
891 u64 deltaS = roundedS - grp->S -
892 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
893 agg->S -= deltaS;
894 agg->F -= deltaS;
895 slot = QFQ_MAX_SLOTS - 2;
896 }
897
898 i = (grp->front + slot) % QFQ_MAX_SLOTS;
899
900 hlist_add_head(&agg->next, &grp->slots[i]);
901 __set_bit(slot, &grp->full_slots);
902}
903
904
905static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
906{
907 return hlist_entry(grp->slots[grp->front].first,
908 struct qfq_aggregate, next);
909}
910
911
912
913
914static void qfq_front_slot_remove(struct qfq_group *grp)
915{
916 struct qfq_aggregate *agg = qfq_slot_head(grp);
917
918 BUG_ON(!agg);
919 hlist_del(&agg->next);
920 if (hlist_empty(&grp->slots[grp->front]))
921 __clear_bit(0, &grp->full_slots);
922}
923
924
925
926
927
928
929static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
930{
931 unsigned int i;
932
933 pr_debug("qfq slot_scan: grp %u full %#lx\n",
934 grp->index, grp->full_slots);
935
936 if (grp->full_slots == 0)
937 return NULL;
938
939 i = __ffs(grp->full_slots);
940 if (i > 0) {
941 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
942 grp->full_slots >>= i;
943 }
944
945 return qfq_slot_head(grp);
946}
947
948
949
950
951
952
953
954
955
956
957static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
958{
959 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
960
961 grp->full_slots <<= i;
962 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
963}
964
965static void qfq_update_eligible(struct qfq_sched *q)
966{
967 struct qfq_group *grp;
968 unsigned long ineligible;
969
970 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
971 if (ineligible) {
972 if (!q->bitmaps[ER]) {
973 grp = qfq_ffs(q, ineligible);
974 if (qfq_gt(grp->S, q->V))
975 q->V = grp->S;
976 }
977 qfq_make_eligible(q);
978 }
979}
980
981
982static void agg_dequeue(struct qfq_aggregate *agg,
983 struct qfq_class *cl, unsigned int len)
984{
985 qdisc_dequeue_peeked(cl->qdisc);
986
987 cl->deficit -= (int) len;
988
989 if (cl->qdisc->q.qlen == 0)
990 list_del(&cl->alist);
991 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
992 cl->deficit += agg->lmax;
993 list_move_tail(&cl->alist, &agg->active);
994 }
995}
996
997static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
998 struct qfq_class **cl,
999 unsigned int *len)
1000{
1001 struct sk_buff *skb;
1002
1003 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
1004 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
1005 if (skb == NULL)
1006 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1007 else
1008 *len = qdisc_pkt_len(skb);
1009
1010 return skb;
1011}
1012
1013
1014static inline void charge_actual_service(struct qfq_aggregate *agg)
1015{
1016
1017
1018
1019
1020
1021 u32 service_received = min(agg->budgetmax,
1022 agg->initial_budget - agg->budget);
1023
1024 agg->F = agg->S + (u64)service_received * agg->inv_w;
1025}
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1040{
1041 unsigned long mask;
1042 u64 limit, roundedF;
1043 int slot_shift = agg->grp->slot_shift;
1044
1045 roundedF = qfq_round_down(agg->F, slot_shift);
1046 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1047
1048 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1049
1050 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1051 if (mask) {
1052 struct qfq_group *next = qfq_ffs(q, mask);
1053 if (qfq_gt(roundedF, next->F)) {
1054 if (qfq_gt(limit, next->F))
1055 agg->S = next->F;
1056 else
1057 agg->S = limit;
1058 return;
1059 }
1060 }
1061 agg->S = q->V;
1062 } else
1063 agg->S = agg->F;
1064}
1065
1066
1067
1068
1069
1070
1071static inline void
1072qfq_update_agg_ts(struct qfq_sched *q,
1073 struct qfq_aggregate *agg, enum update_reason reason)
1074{
1075 if (reason != requeue)
1076 qfq_update_start(q, agg);
1077 else
1078 agg->S = agg->F;
1079
1080 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1081}
1082
1083static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1084
1085static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1086{
1087 struct qfq_sched *q = qdisc_priv(sch);
1088 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1089 struct qfq_class *cl;
1090 struct sk_buff *skb = NULL;
1091
1092 unsigned int len = 0;
1093
1094 if (in_serv_agg == NULL)
1095 return NULL;
1096
1097 if (!list_empty(&in_serv_agg->active))
1098 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1099
1100
1101
1102
1103
1104
1105 if (len == 0 || in_serv_agg->budget < len) {
1106 charge_actual_service(in_serv_agg);
1107
1108
1109 in_serv_agg->initial_budget = in_serv_agg->budget =
1110 in_serv_agg->budgetmax;
1111
1112 if (!list_empty(&in_serv_agg->active)) {
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 qfq_update_agg_ts(q, in_serv_agg, requeue);
1124 qfq_schedule_agg(q, in_serv_agg);
1125 } else if (sch->q.qlen == 0) {
1126 q->in_serv_agg = NULL;
1127 return NULL;
1128 }
1129
1130
1131
1132
1133
1134 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1135 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1136 }
1137 if (!skb)
1138 return NULL;
1139
1140 qdisc_qstats_backlog_dec(sch, skb);
1141 sch->q.qlen--;
1142 qdisc_bstats_update(sch, skb);
1143
1144 agg_dequeue(in_serv_agg, cl, len);
1145
1146
1147
1148
1149 if (unlikely(in_serv_agg->budget < len))
1150 in_serv_agg->budget = 0;
1151 else
1152 in_serv_agg->budget -= len;
1153
1154 q->V += (u64)len * q->iwsum;
1155 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1156 len, (unsigned long long) in_serv_agg->F,
1157 (unsigned long long) q->V);
1158
1159 return skb;
1160}
1161
1162static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1163{
1164 struct qfq_group *grp;
1165 struct qfq_aggregate *agg, *new_front_agg;
1166 u64 old_F;
1167
1168 qfq_update_eligible(q);
1169 q->oldV = q->V;
1170
1171 if (!q->bitmaps[ER])
1172 return NULL;
1173
1174 grp = qfq_ffs(q, q->bitmaps[ER]);
1175 old_F = grp->F;
1176
1177 agg = qfq_slot_head(grp);
1178
1179
1180 qfq_front_slot_remove(grp);
1181
1182 new_front_agg = qfq_slot_scan(grp);
1183
1184 if (new_front_agg == NULL)
1185 __clear_bit(grp->index, &q->bitmaps[ER]);
1186 else {
1187 u64 roundedS = qfq_round_down(new_front_agg->S,
1188 grp->slot_shift);
1189 unsigned int s;
1190
1191 if (grp->S == roundedS)
1192 return agg;
1193 grp->S = roundedS;
1194 grp->F = roundedS + (2ULL << grp->slot_shift);
1195 __clear_bit(grp->index, &q->bitmaps[ER]);
1196 s = qfq_calc_state(q, grp);
1197 __set_bit(grp->index, &q->bitmaps[s]);
1198 }
1199
1200 qfq_unblock_groups(q, grp->index, old_F);
1201
1202 return agg;
1203}
1204
1205static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1206 struct sk_buff **to_free)
1207{
1208 struct qfq_sched *q = qdisc_priv(sch);
1209 struct qfq_class *cl;
1210 struct qfq_aggregate *agg;
1211 int err = 0;
1212
1213 cl = qfq_classify(skb, sch, &err);
1214 if (cl == NULL) {
1215 if (err & __NET_XMIT_BYPASS)
1216 qdisc_qstats_drop(sch);
1217 __qdisc_drop(skb, to_free);
1218 return err;
1219 }
1220 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1221
1222 if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
1223 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1224 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
1225 err = qfq_change_agg(sch, cl, cl->agg->class_weight,
1226 qdisc_pkt_len(skb));
1227 if (err) {
1228 cl->qstats.drops++;
1229 return qdisc_drop(skb, sch, to_free);
1230 }
1231 }
1232
1233 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1234 if (unlikely(err != NET_XMIT_SUCCESS)) {
1235 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1236 if (net_xmit_drop_count(err)) {
1237 cl->qstats.drops++;
1238 qdisc_qstats_drop(sch);
1239 }
1240 return err;
1241 }
1242
1243 bstats_update(&cl->bstats, skb);
1244 qdisc_qstats_backlog_inc(sch, skb);
1245 ++sch->q.qlen;
1246
1247 agg = cl->agg;
1248
1249 if (cl->qdisc->q.qlen != 1) {
1250 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1251 list_first_entry(&agg->active, struct qfq_class, alist)
1252 == cl && cl->deficit < qdisc_pkt_len(skb))
1253 list_move_tail(&cl->alist, &agg->active);
1254
1255 return err;
1256 }
1257
1258
1259 cl->deficit = agg->lmax;
1260 list_add_tail(&cl->alist, &agg->active);
1261
1262 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1263 q->in_serv_agg == agg)
1264 return err;
1265
1266 qfq_activate_agg(q, agg, enqueue);
1267
1268 return err;
1269}
1270
1271
1272
1273
1274static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1275{
1276 struct qfq_group *grp = agg->grp;
1277 u64 roundedS;
1278 int s;
1279
1280 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 if (grp->full_slots) {
1292 if (!qfq_gt(grp->S, agg->S))
1293 goto skip_update;
1294
1295
1296 qfq_slot_rotate(grp, roundedS);
1297
1298 __clear_bit(grp->index, &q->bitmaps[IR]);
1299 __clear_bit(grp->index, &q->bitmaps[IB]);
1300 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1301 q->in_serv_agg == NULL)
1302 q->V = roundedS;
1303
1304 grp->S = roundedS;
1305 grp->F = roundedS + (2ULL << grp->slot_shift);
1306 s = qfq_calc_state(q, grp);
1307 __set_bit(grp->index, &q->bitmaps[s]);
1308
1309 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1310 s, q->bitmaps[s],
1311 (unsigned long long) agg->S,
1312 (unsigned long long) agg->F,
1313 (unsigned long long) q->V);
1314
1315skip_update:
1316 qfq_slot_insert(grp, agg, roundedS);
1317}
1318
1319
1320
1321static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1322 enum update_reason reason)
1323{
1324 agg->initial_budget = agg->budget = agg->budgetmax;
1325
1326 qfq_update_agg_ts(q, agg, reason);
1327 if (q->in_serv_agg == NULL) {
1328 q->in_serv_agg = agg;
1329
1330 q->oldV = q->V = agg->S;
1331 } else if (agg != q->in_serv_agg)
1332 qfq_schedule_agg(q, agg);
1333}
1334
1335static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1336 struct qfq_aggregate *agg)
1337{
1338 unsigned int i, offset;
1339 u64 roundedS;
1340
1341 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1342 offset = (roundedS - grp->S) >> grp->slot_shift;
1343
1344 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1345
1346 hlist_del(&agg->next);
1347 if (hlist_empty(&grp->slots[i]))
1348 __clear_bit(offset, &grp->full_slots);
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1359{
1360 struct qfq_group *grp = agg->grp;
1361 unsigned long mask;
1362 u64 roundedS;
1363 int s;
1364
1365 if (agg == q->in_serv_agg) {
1366 charge_actual_service(agg);
1367 q->in_serv_agg = qfq_choose_next_agg(q);
1368 return;
1369 }
1370
1371 agg->F = agg->S;
1372 qfq_slot_remove(q, grp, agg);
1373
1374 if (!grp->full_slots) {
1375 __clear_bit(grp->index, &q->bitmaps[IR]);
1376 __clear_bit(grp->index, &q->bitmaps[EB]);
1377 __clear_bit(grp->index, &q->bitmaps[IB]);
1378
1379 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1380 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1381 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1382 if (mask)
1383 mask = ~((1UL << __fls(mask)) - 1);
1384 else
1385 mask = ~0UL;
1386 qfq_move_groups(q, mask, EB, ER);
1387 qfq_move_groups(q, mask, IB, IR);
1388 }
1389 __clear_bit(grp->index, &q->bitmaps[ER]);
1390 } else if (hlist_empty(&grp->slots[grp->front])) {
1391 agg = qfq_slot_scan(grp);
1392 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1393 if (grp->S != roundedS) {
1394 __clear_bit(grp->index, &q->bitmaps[ER]);
1395 __clear_bit(grp->index, &q->bitmaps[IR]);
1396 __clear_bit(grp->index, &q->bitmaps[EB]);
1397 __clear_bit(grp->index, &q->bitmaps[IB]);
1398 grp->S = roundedS;
1399 grp->F = roundedS + (2ULL << grp->slot_shift);
1400 s = qfq_calc_state(q, grp);
1401 __set_bit(grp->index, &q->bitmaps[s]);
1402 }
1403 }
1404}
1405
1406static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1407{
1408 struct qfq_sched *q = qdisc_priv(sch);
1409 struct qfq_class *cl = (struct qfq_class *)arg;
1410
1411 qfq_deactivate_class(q, cl);
1412}
1413
1414static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1415{
1416 struct qfq_sched *q = qdisc_priv(sch);
1417 struct qfq_group *grp;
1418 int i, j, err;
1419 u32 max_cl_shift, maxbudg_shift, max_classes;
1420
1421 err = tcf_block_get(&q->block, &q->filter_list);
1422 if (err)
1423 return err;
1424
1425 err = qdisc_class_hash_init(&q->clhash);
1426 if (err < 0)
1427 return err;
1428
1429 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1430 max_classes = QFQ_MAX_AGG_CLASSES;
1431 else
1432 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1433
1434 max_cl_shift = __fls(max_classes);
1435 q->max_agg_classes = 1<<max_cl_shift;
1436
1437
1438 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1439 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1440
1441 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1442 grp = &q->groups[i];
1443 grp->index = i;
1444 grp->slot_shift = q->min_slot_shift + i;
1445 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1446 INIT_HLIST_HEAD(&grp->slots[j]);
1447 }
1448
1449 INIT_HLIST_HEAD(&q->nonfull_aggs);
1450
1451 return 0;
1452}
1453
1454static void qfq_reset_qdisc(struct Qdisc *sch)
1455{
1456 struct qfq_sched *q = qdisc_priv(sch);
1457 struct qfq_class *cl;
1458 unsigned int i;
1459
1460 for (i = 0; i < q->clhash.hashsize; i++) {
1461 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1462 if (cl->qdisc->q.qlen > 0)
1463 qfq_deactivate_class(q, cl);
1464
1465 qdisc_reset(cl->qdisc);
1466 }
1467 }
1468 sch->qstats.backlog = 0;
1469 sch->q.qlen = 0;
1470}
1471
1472static void qfq_destroy_qdisc(struct Qdisc *sch)
1473{
1474 struct qfq_sched *q = qdisc_priv(sch);
1475 struct qfq_class *cl;
1476 struct hlist_node *next;
1477 unsigned int i;
1478
1479 tcf_block_put(q->block);
1480
1481 for (i = 0; i < q->clhash.hashsize; i++) {
1482 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1483 common.hnode) {
1484 qfq_destroy_class(sch, cl);
1485 }
1486 }
1487 qdisc_class_hash_destroy(&q->clhash);
1488}
1489
1490static const struct Qdisc_class_ops qfq_class_ops = {
1491 .change = qfq_change_class,
1492 .delete = qfq_delete_class,
1493 .find = qfq_search_class,
1494 .tcf_block = qfq_tcf_block,
1495 .bind_tcf = qfq_bind_tcf,
1496 .unbind_tcf = qfq_unbind_tcf,
1497 .graft = qfq_graft_class,
1498 .leaf = qfq_class_leaf,
1499 .qlen_notify = qfq_qlen_notify,
1500 .dump = qfq_dump_class,
1501 .dump_stats = qfq_dump_class_stats,
1502 .walk = qfq_walk,
1503};
1504
1505static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1506 .cl_ops = &qfq_class_ops,
1507 .id = "qfq",
1508 .priv_size = sizeof(struct qfq_sched),
1509 .enqueue = qfq_enqueue,
1510 .dequeue = qfq_dequeue,
1511 .peek = qdisc_peek_dequeued,
1512 .init = qfq_init_qdisc,
1513 .reset = qfq_reset_qdisc,
1514 .destroy = qfq_destroy_qdisc,
1515 .owner = THIS_MODULE,
1516};
1517
1518static int __init qfq_init(void)
1519{
1520 return register_qdisc(&qfq_qdisc_ops);
1521}
1522
1523static void __exit qfq_exit(void)
1524{
1525 unregister_qdisc(&qfq_qdisc_ops);
1526}
1527
1528module_init(qfq_init);
1529module_exit(qfq_exit);
1530MODULE_LICENSE("GPL");
1531