1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116
117#define QFQ_MTU_SHIFT 16
118#define QFQ_MIN_LMAX 512
119
120#define QFQ_MAX_AGG_CLASSES 8
121
122
123
124
125
126enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
127
128struct qfq_group;
129
130struct qfq_aggregate;
131
132struct qfq_class {
133 struct Qdisc_class_common common;
134
135 unsigned int refcnt;
136 unsigned int filter_cnt;
137
138 struct gnet_stats_basic_packed bstats;
139 struct gnet_stats_queue qstats;
140 struct gnet_stats_rate_est64 rate_est;
141 struct Qdisc *qdisc;
142 struct list_head alist;
143 struct qfq_aggregate *agg;
144 int deficit;
145};
146
147struct qfq_aggregate {
148 struct hlist_node next;
149 u64 S, F;
150
151
152
153
154
155 struct qfq_group *grp;
156
157
158 u32 class_weight;
159
160 int lmax;
161
162 u32 inv_w;
163 u32 budgetmax;
164 u32 initial_budget, budget;
165
166 int num_classes;
167 struct list_head active;
168
169 struct hlist_node nonfull_next;
170};
171
172struct qfq_group {
173 u64 S, F;
174 unsigned int slot_shift;
175 unsigned int index;
176 unsigned int front;
177 unsigned long full_slots;
178
179
180 struct hlist_head slots[QFQ_MAX_SLOTS];
181};
182
183struct qfq_sched {
184 struct tcf_proto __rcu *filter_list;
185 struct Qdisc_class_hash clhash;
186
187 u64 oldV, V;
188 struct qfq_aggregate *in_serv_agg;
189 u32 wsum;
190 u32 iwsum;
191
192 unsigned long bitmaps[QFQ_MAX_STATE];
193 struct qfq_group groups[QFQ_MAX_INDEX + 1];
194 u32 min_slot_shift;
195
196 u32 max_agg_classes;
197 struct hlist_head nonfull_aggs;
198};
199
200
201
202
203
204
205
206
207enum update_reason {enqueue, requeue};
208
209static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
210{
211 struct qfq_sched *q = qdisc_priv(sch);
212 struct Qdisc_class_common *clc;
213
214 clc = qdisc_class_find(&q->clhash, classid);
215 if (clc == NULL)
216 return NULL;
217 return container_of(clc, struct qfq_class, common);
218}
219
220static void qfq_purge_queue(struct qfq_class *cl)
221{
222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
224
225 qdisc_reset(cl->qdisc);
226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
227}
228
229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
230 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
231 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
232};
233
234
235
236
237
238
239static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
240{
241 u64 slot_size = (u64)maxlen * inv_w;
242 unsigned long size_map;
243 int index = 0;
244
245 size_map = slot_size >> min_slot_shift;
246 if (!size_map)
247 goto out;
248
249 index = __fls(size_map) + 1;
250 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
251
252 if (index < 0)
253 index = 0;
254out:
255 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
256 (unsigned long) ONE_FP/inv_w, maxlen, index);
257
258 return index;
259}
260
261static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
262static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
263 enum update_reason);
264
265static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
266 u32 lmax, u32 weight)
267{
268 INIT_LIST_HEAD(&agg->active);
269 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
270
271 agg->lmax = lmax;
272 agg->class_weight = weight;
273}
274
275static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
276 u32 lmax, u32 weight)
277{
278 struct qfq_aggregate *agg;
279
280 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
281 if (agg->lmax == lmax && agg->class_weight == weight)
282 return agg;
283
284 return NULL;
285}
286
287
288
289static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
290 int new_num_classes)
291{
292 u32 new_agg_weight;
293
294 if (new_num_classes == q->max_agg_classes)
295 hlist_del_init(&agg->nonfull_next);
296
297 if (agg->num_classes > new_num_classes &&
298 new_num_classes == q->max_agg_classes - 1)
299 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
300
301
302
303
304
305 agg->budgetmax = new_num_classes * agg->lmax;
306 new_agg_weight = agg->class_weight * new_num_classes;
307 agg->inv_w = ONE_FP/new_agg_weight;
308
309 if (agg->grp == NULL) {
310 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
311 q->min_slot_shift);
312 agg->grp = &q->groups[i];
313 }
314
315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
318
319 agg->num_classes = new_num_classes;
320}
321
322
323static void qfq_add_to_agg(struct qfq_sched *q,
324 struct qfq_aggregate *agg,
325 struct qfq_class *cl)
326{
327 cl->agg = agg;
328
329 qfq_update_agg(q, agg, agg->num_classes+1);
330 if (cl->qdisc->q.qlen > 0) {
331 list_add_tail(&cl->alist, &agg->active);
332 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
333 cl && q->in_serv_agg != agg)
334 qfq_activate_agg(q, agg, enqueue);
335 }
336}
337
338static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
339
340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
341{
342 hlist_del_init(&agg->nonfull_next);
343 q->wsum -= agg->class_weight;
344 if (q->wsum != 0)
345 q->iwsum = ONE_FP / q->wsum;
346
347 if (q->in_serv_agg == agg)
348 q->in_serv_agg = qfq_choose_next_agg(q);
349 kfree(agg);
350}
351
352
353static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
354{
355 struct qfq_aggregate *agg = cl->agg;
356
357
358 list_del(&cl->alist);
359 if (list_empty(&agg->active))
360 qfq_deactivate_agg(q, agg);
361}
362
363
364static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
365{
366 struct qfq_aggregate *agg = cl->agg;
367
368 cl->agg = NULL;
369 if (agg->num_classes == 1) {
370 qfq_destroy_agg(q, agg);
371 return;
372 }
373 qfq_update_agg(q, agg, agg->num_classes-1);
374}
375
376
377static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
378{
379 if (cl->qdisc->q.qlen > 0)
380 qfq_deactivate_class(q, cl);
381
382 qfq_rm_from_agg(q, cl);
383}
384
385
386static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
387 u32 lmax)
388{
389 struct qfq_sched *q = qdisc_priv(sch);
390 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
391
392 if (new_agg == NULL) {
393 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
394 if (new_agg == NULL)
395 return -ENOBUFS;
396 qfq_init_agg(q, new_agg, lmax, weight);
397 }
398 qfq_deact_rm_from_agg(q, cl);
399 qfq_add_to_agg(q, new_agg, cl);
400
401 return 0;
402}
403
404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
405 struct nlattr **tca, unsigned long *arg)
406{
407 struct qfq_sched *q = qdisc_priv(sch);
408 struct qfq_class *cl = (struct qfq_class *)*arg;
409 bool existing = false;
410 struct nlattr *tb[TCA_QFQ_MAX + 1];
411 struct qfq_aggregate *new_agg = NULL;
412 u32 weight, lmax, inv_w;
413 int err;
414 int delta_w;
415
416 if (tca[TCA_OPTIONS] == NULL) {
417 pr_notice("qfq: no options\n");
418 return -EINVAL;
419 }
420
421 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
422 if (err < 0)
423 return err;
424
425 if (tb[TCA_QFQ_WEIGHT]) {
426 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
427 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
428 pr_notice("qfq: invalid weight %u\n", weight);
429 return -EINVAL;
430 }
431 } else
432 weight = 1;
433
434 if (tb[TCA_QFQ_LMAX]) {
435 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
436 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
437 pr_notice("qfq: invalid max length %u\n", lmax);
438 return -EINVAL;
439 }
440 } else
441 lmax = psched_mtu(qdisc_dev(sch));
442
443 inv_w = ONE_FP / weight;
444 weight = ONE_FP / inv_w;
445
446 if (cl != NULL &&
447 lmax == cl->agg->lmax &&
448 weight == cl->agg->class_weight)
449 return 0;
450
451 delta_w = weight - (cl ? cl->agg->class_weight : 0);
452
453 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
454 pr_notice("qfq: total weight out of range (%d + %u)\n",
455 delta_w, q->wsum);
456 return -EINVAL;
457 }
458
459 if (cl != NULL) {
460 if (tca[TCA_RATE]) {
461 err = gen_replace_estimator(&cl->bstats, NULL,
462 &cl->rate_est,
463 qdisc_root_sleeping_lock(sch),
464 tca[TCA_RATE]);
465 if (err)
466 return err;
467 }
468 existing = true;
469 goto set_change_agg;
470 }
471
472
473 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
474 if (cl == NULL)
475 return -ENOBUFS;
476
477 cl->refcnt = 1;
478 cl->common.classid = classid;
479 cl->deficit = lmax;
480
481 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
482 &pfifo_qdisc_ops, classid);
483 if (cl->qdisc == NULL)
484 cl->qdisc = &noop_qdisc;
485
486 if (tca[TCA_RATE]) {
487 err = gen_new_estimator(&cl->bstats, NULL,
488 &cl->rate_est,
489 qdisc_root_sleeping_lock(sch),
490 tca[TCA_RATE]);
491 if (err)
492 goto destroy_class;
493 }
494
495 sch_tree_lock(sch);
496 qdisc_class_hash_insert(&q->clhash, &cl->common);
497 sch_tree_unlock(sch);
498
499 qdisc_class_hash_grow(sch, &q->clhash);
500
501set_change_agg:
502 sch_tree_lock(sch);
503 new_agg = qfq_find_agg(q, lmax, weight);
504 if (new_agg == NULL) {
505 sch_tree_unlock(sch);
506 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
507 if (new_agg == NULL) {
508 err = -ENOBUFS;
509 gen_kill_estimator(&cl->bstats, &cl->rate_est);
510 goto destroy_class;
511 }
512 sch_tree_lock(sch);
513 qfq_init_agg(q, new_agg, lmax, weight);
514 }
515 if (existing)
516 qfq_deact_rm_from_agg(q, cl);
517 qfq_add_to_agg(q, new_agg, cl);
518 sch_tree_unlock(sch);
519
520 *arg = (unsigned long)cl;
521 return 0;
522
523destroy_class:
524 qdisc_destroy(cl->qdisc);
525 kfree(cl);
526 return err;
527}
528
529static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
530{
531 struct qfq_sched *q = qdisc_priv(sch);
532
533 qfq_rm_from_agg(q, cl);
534 gen_kill_estimator(&cl->bstats, &cl->rate_est);
535 qdisc_destroy(cl->qdisc);
536 kfree(cl);
537}
538
539static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
540{
541 struct qfq_sched *q = qdisc_priv(sch);
542 struct qfq_class *cl = (struct qfq_class *)arg;
543
544 if (cl->filter_cnt > 0)
545 return -EBUSY;
546
547 sch_tree_lock(sch);
548
549 qfq_purge_queue(cl);
550 qdisc_class_hash_remove(&q->clhash, &cl->common);
551
552 BUG_ON(--cl->refcnt == 0);
553
554
555
556
557
558 sch_tree_unlock(sch);
559 return 0;
560}
561
562static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
563{
564 struct qfq_class *cl = qfq_find_class(sch, classid);
565
566 if (cl != NULL)
567 cl->refcnt++;
568
569 return (unsigned long)cl;
570}
571
572static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
573{
574 struct qfq_class *cl = (struct qfq_class *)arg;
575
576 if (--cl->refcnt == 0)
577 qfq_destroy_class(sch, cl);
578}
579
580static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
581 unsigned long cl)
582{
583 struct qfq_sched *q = qdisc_priv(sch);
584
585 if (cl)
586 return NULL;
587
588 return &q->filter_list;
589}
590
591static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
592 u32 classid)
593{
594 struct qfq_class *cl = qfq_find_class(sch, classid);
595
596 if (cl != NULL)
597 cl->filter_cnt++;
598
599 return (unsigned long)cl;
600}
601
602static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
603{
604 struct qfq_class *cl = (struct qfq_class *)arg;
605
606 cl->filter_cnt--;
607}
608
609static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
610 struct Qdisc *new, struct Qdisc **old)
611{
612 struct qfq_class *cl = (struct qfq_class *)arg;
613
614 if (new == NULL) {
615 new = qdisc_create_dflt(sch->dev_queue,
616 &pfifo_qdisc_ops, cl->common.classid);
617 if (new == NULL)
618 new = &noop_qdisc;
619 }
620
621 *old = qdisc_replace(sch, new, &cl->qdisc);
622 return 0;
623}
624
625static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
626{
627 struct qfq_class *cl = (struct qfq_class *)arg;
628
629 return cl->qdisc;
630}
631
632static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
633 struct sk_buff *skb, struct tcmsg *tcm)
634{
635 struct qfq_class *cl = (struct qfq_class *)arg;
636 struct nlattr *nest;
637
638 tcm->tcm_parent = TC_H_ROOT;
639 tcm->tcm_handle = cl->common.classid;
640 tcm->tcm_info = cl->qdisc->handle;
641
642 nest = nla_nest_start(skb, TCA_OPTIONS);
643 if (nest == NULL)
644 goto nla_put_failure;
645 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
646 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
647 goto nla_put_failure;
648 return nla_nest_end(skb, nest);
649
650nla_put_failure:
651 nla_nest_cancel(skb, nest);
652 return -EMSGSIZE;
653}
654
655static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
656 struct gnet_dump *d)
657{
658 struct qfq_class *cl = (struct qfq_class *)arg;
659 struct tc_qfq_stats xstats;
660
661 memset(&xstats, 0, sizeof(xstats));
662
663 xstats.weight = cl->agg->class_weight;
664 xstats.lmax = cl->agg->lmax;
665
666 if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
667 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
668 gnet_stats_copy_queue(d, NULL,
669 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
670 return -1;
671
672 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
673}
674
675static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
676{
677 struct qfq_sched *q = qdisc_priv(sch);
678 struct qfq_class *cl;
679 unsigned int i;
680
681 if (arg->stop)
682 return;
683
684 for (i = 0; i < q->clhash.hashsize; i++) {
685 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
686 if (arg->count < arg->skip) {
687 arg->count++;
688 continue;
689 }
690 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
691 arg->stop = 1;
692 return;
693 }
694 arg->count++;
695 }
696 }
697}
698
699static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
700 int *qerr)
701{
702 struct qfq_sched *q = qdisc_priv(sch);
703 struct qfq_class *cl;
704 struct tcf_result res;
705 struct tcf_proto *fl;
706 int result;
707
708 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
709 pr_debug("qfq_classify: found %d\n", skb->priority);
710 cl = qfq_find_class(sch, skb->priority);
711 if (cl != NULL)
712 return cl;
713 }
714
715 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
716 fl = rcu_dereference_bh(q->filter_list);
717 result = tc_classify(skb, fl, &res, false);
718 if (result >= 0) {
719#ifdef CONFIG_NET_CLS_ACT
720 switch (result) {
721 case TC_ACT_QUEUED:
722 case TC_ACT_STOLEN:
723 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
724 case TC_ACT_SHOT:
725 return NULL;
726 }
727#endif
728 cl = (struct qfq_class *)res.class;
729 if (cl == NULL)
730 cl = qfq_find_class(sch, res.classid);
731 return cl;
732 }
733
734 return NULL;
735}
736
737
738static inline int qfq_gt(u64 a, u64 b)
739{
740 return (s64)(a - b) > 0;
741}
742
743
744static inline u64 qfq_round_down(u64 ts, unsigned int shift)
745{
746 return ts & ~((1ULL << shift) - 1);
747}
748
749
750static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
751 unsigned long bitmap)
752{
753 int index = __ffs(bitmap);
754 return &q->groups[index];
755}
756
757static inline unsigned long mask_from(unsigned long bitmap, int from)
758{
759 return bitmap & ~((1UL << from) - 1);
760}
761
762
763
764
765
766
767static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
768{
769
770 unsigned int state = qfq_gt(grp->S, q->V);
771 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
772 struct qfq_group *next;
773
774 if (mask) {
775 next = qfq_ffs(q, mask);
776 if (qfq_gt(grp->F, next->F))
777 state |= EB;
778 }
779
780 return state;
781}
782
783
784
785
786
787
788
789
790static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
791 int src, int dst)
792{
793 q->bitmaps[dst] |= q->bitmaps[src] & mask;
794 q->bitmaps[src] &= ~mask;
795}
796
797static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
798{
799 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
800 struct qfq_group *next;
801
802 if (mask) {
803 next = qfq_ffs(q, mask);
804 if (!qfq_gt(next->F, old_F))
805 return;
806 }
807
808 mask = (1UL << index) - 1;
809 qfq_move_groups(q, mask, EB, ER);
810 qfq_move_groups(q, mask, IB, IR);
811}
812
813
814
815
816
817
818
819
820
821
822
823static void qfq_make_eligible(struct qfq_sched *q)
824{
825 unsigned long vslot = q->V >> q->min_slot_shift;
826 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
827
828 if (vslot != old_vslot) {
829 unsigned long mask;
830 int last_flip_pos = fls(vslot ^ old_vslot);
831
832 if (last_flip_pos > 31)
833 mask = ~0UL;
834 else
835 mask = (1UL << last_flip_pos) - 1;
836
837 qfq_move_groups(q, mask, IR, ER);
838 qfq_move_groups(q, mask, IB, EB);
839 }
840}
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
898 u64 roundedS)
899{
900 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
901 unsigned int i;
902
903 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
904 u64 deltaS = roundedS - grp->S -
905 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
906 agg->S -= deltaS;
907 agg->F -= deltaS;
908 slot = QFQ_MAX_SLOTS - 2;
909 }
910
911 i = (grp->front + slot) % QFQ_MAX_SLOTS;
912
913 hlist_add_head(&agg->next, &grp->slots[i]);
914 __set_bit(slot, &grp->full_slots);
915}
916
917
918static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
919{
920 return hlist_entry(grp->slots[grp->front].first,
921 struct qfq_aggregate, next);
922}
923
924
925
926
927static void qfq_front_slot_remove(struct qfq_group *grp)
928{
929 struct qfq_aggregate *agg = qfq_slot_head(grp);
930
931 BUG_ON(!agg);
932 hlist_del(&agg->next);
933 if (hlist_empty(&grp->slots[grp->front]))
934 __clear_bit(0, &grp->full_slots);
935}
936
937
938
939
940
941
942static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
943{
944 unsigned int i;
945
946 pr_debug("qfq slot_scan: grp %u full %#lx\n",
947 grp->index, grp->full_slots);
948
949 if (grp->full_slots == 0)
950 return NULL;
951
952 i = __ffs(grp->full_slots);
953 if (i > 0) {
954 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
955 grp->full_slots >>= i;
956 }
957
958 return qfq_slot_head(grp);
959}
960
961
962
963
964
965
966
967
968
969
970static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
971{
972 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
973
974 grp->full_slots <<= i;
975 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
976}
977
978static void qfq_update_eligible(struct qfq_sched *q)
979{
980 struct qfq_group *grp;
981 unsigned long ineligible;
982
983 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
984 if (ineligible) {
985 if (!q->bitmaps[ER]) {
986 grp = qfq_ffs(q, ineligible);
987 if (qfq_gt(grp->S, q->V))
988 q->V = grp->S;
989 }
990 qfq_make_eligible(q);
991 }
992}
993
994
995static void agg_dequeue(struct qfq_aggregate *agg,
996 struct qfq_class *cl, unsigned int len)
997{
998 qdisc_dequeue_peeked(cl->qdisc);
999
1000 cl->deficit -= (int) len;
1001
1002 if (cl->qdisc->q.qlen == 0)
1003 list_del(&cl->alist);
1004 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
1005 cl->deficit += agg->lmax;
1006 list_move_tail(&cl->alist, &agg->active);
1007 }
1008}
1009
1010static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
1011 struct qfq_class **cl,
1012 unsigned int *len)
1013{
1014 struct sk_buff *skb;
1015
1016 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
1017 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
1018 if (skb == NULL)
1019 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1020 else
1021 *len = qdisc_pkt_len(skb);
1022
1023 return skb;
1024}
1025
1026
1027static inline void charge_actual_service(struct qfq_aggregate *agg)
1028{
1029
1030
1031
1032
1033
1034 u32 service_received = min(agg->budgetmax,
1035 agg->initial_budget - agg->budget);
1036
1037 agg->F = agg->S + (u64)service_received * agg->inv_w;
1038}
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1053{
1054 unsigned long mask;
1055 u64 limit, roundedF;
1056 int slot_shift = agg->grp->slot_shift;
1057
1058 roundedF = qfq_round_down(agg->F, slot_shift);
1059 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1060
1061 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1062
1063 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1064 if (mask) {
1065 struct qfq_group *next = qfq_ffs(q, mask);
1066 if (qfq_gt(roundedF, next->F)) {
1067 if (qfq_gt(limit, next->F))
1068 agg->S = next->F;
1069 else
1070 agg->S = limit;
1071 return;
1072 }
1073 }
1074 agg->S = q->V;
1075 } else
1076 agg->S = agg->F;
1077}
1078
1079
1080
1081
1082
1083
1084static inline void
1085qfq_update_agg_ts(struct qfq_sched *q,
1086 struct qfq_aggregate *agg, enum update_reason reason)
1087{
1088 if (reason != requeue)
1089 qfq_update_start(q, agg);
1090 else
1091 agg->S = agg->F;
1092
1093 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1094}
1095
1096static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1097
1098static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1099{
1100 struct qfq_sched *q = qdisc_priv(sch);
1101 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1102 struct qfq_class *cl;
1103 struct sk_buff *skb = NULL;
1104
1105 unsigned int len = 0;
1106
1107 if (in_serv_agg == NULL)
1108 return NULL;
1109
1110 if (!list_empty(&in_serv_agg->active))
1111 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1112
1113
1114
1115
1116
1117
1118 if (len == 0 || in_serv_agg->budget < len) {
1119 charge_actual_service(in_serv_agg);
1120
1121
1122 in_serv_agg->initial_budget = in_serv_agg->budget =
1123 in_serv_agg->budgetmax;
1124
1125 if (!list_empty(&in_serv_agg->active)) {
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 qfq_update_agg_ts(q, in_serv_agg, requeue);
1137 qfq_schedule_agg(q, in_serv_agg);
1138 } else if (sch->q.qlen == 0) {
1139 q->in_serv_agg = NULL;
1140 return NULL;
1141 }
1142
1143
1144
1145
1146
1147 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1148 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1149 }
1150 if (!skb)
1151 return NULL;
1152
1153 sch->q.qlen--;
1154 qdisc_bstats_update(sch, skb);
1155
1156 agg_dequeue(in_serv_agg, cl, len);
1157
1158
1159
1160
1161 if (unlikely(in_serv_agg->budget < len))
1162 in_serv_agg->budget = 0;
1163 else
1164 in_serv_agg->budget -= len;
1165
1166 q->V += (u64)len * q->iwsum;
1167 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1168 len, (unsigned long long) in_serv_agg->F,
1169 (unsigned long long) q->V);
1170
1171 return skb;
1172}
1173
1174static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1175{
1176 struct qfq_group *grp;
1177 struct qfq_aggregate *agg, *new_front_agg;
1178 u64 old_F;
1179
1180 qfq_update_eligible(q);
1181 q->oldV = q->V;
1182
1183 if (!q->bitmaps[ER])
1184 return NULL;
1185
1186 grp = qfq_ffs(q, q->bitmaps[ER]);
1187 old_F = grp->F;
1188
1189 agg = qfq_slot_head(grp);
1190
1191
1192 qfq_front_slot_remove(grp);
1193
1194 new_front_agg = qfq_slot_scan(grp);
1195
1196 if (new_front_agg == NULL)
1197 __clear_bit(grp->index, &q->bitmaps[ER]);
1198 else {
1199 u64 roundedS = qfq_round_down(new_front_agg->S,
1200 grp->slot_shift);
1201 unsigned int s;
1202
1203 if (grp->S == roundedS)
1204 return agg;
1205 grp->S = roundedS;
1206 grp->F = roundedS + (2ULL << grp->slot_shift);
1207 __clear_bit(grp->index, &q->bitmaps[ER]);
1208 s = qfq_calc_state(q, grp);
1209 __set_bit(grp->index, &q->bitmaps[s]);
1210 }
1211
1212 qfq_unblock_groups(q, grp->index, old_F);
1213
1214 return agg;
1215}
1216
1217static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1218{
1219 struct qfq_sched *q = qdisc_priv(sch);
1220 struct qfq_class *cl;
1221 struct qfq_aggregate *agg;
1222 int err = 0;
1223
1224 cl = qfq_classify(skb, sch, &err);
1225 if (cl == NULL) {
1226 if (err & __NET_XMIT_BYPASS)
1227 qdisc_qstats_drop(sch);
1228 kfree_skb(skb);
1229 return err;
1230 }
1231 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1232
1233 if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
1234 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1235 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
1236 err = qfq_change_agg(sch, cl, cl->agg->class_weight,
1237 qdisc_pkt_len(skb));
1238 if (err)
1239 return err;
1240 }
1241
1242 err = qdisc_enqueue(skb, cl->qdisc);
1243 if (unlikely(err != NET_XMIT_SUCCESS)) {
1244 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1245 if (net_xmit_drop_count(err)) {
1246 cl->qstats.drops++;
1247 qdisc_qstats_drop(sch);
1248 }
1249 return err;
1250 }
1251
1252 bstats_update(&cl->bstats, skb);
1253 ++sch->q.qlen;
1254
1255 agg = cl->agg;
1256
1257 if (cl->qdisc->q.qlen != 1) {
1258 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1259 list_first_entry(&agg->active, struct qfq_class, alist)
1260 == cl && cl->deficit < qdisc_pkt_len(skb))
1261 list_move_tail(&cl->alist, &agg->active);
1262
1263 return err;
1264 }
1265
1266
1267 cl->deficit = agg->lmax;
1268 list_add_tail(&cl->alist, &agg->active);
1269
1270 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1271 q->in_serv_agg == agg)
1272 return err;
1273
1274 qfq_activate_agg(q, agg, enqueue);
1275
1276 return err;
1277}
1278
1279
1280
1281
1282static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1283{
1284 struct qfq_group *grp = agg->grp;
1285 u64 roundedS;
1286 int s;
1287
1288 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299 if (grp->full_slots) {
1300 if (!qfq_gt(grp->S, agg->S))
1301 goto skip_update;
1302
1303
1304 qfq_slot_rotate(grp, roundedS);
1305
1306 __clear_bit(grp->index, &q->bitmaps[IR]);
1307 __clear_bit(grp->index, &q->bitmaps[IB]);
1308 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1309 q->in_serv_agg == NULL)
1310 q->V = roundedS;
1311
1312 grp->S = roundedS;
1313 grp->F = roundedS + (2ULL << grp->slot_shift);
1314 s = qfq_calc_state(q, grp);
1315 __set_bit(grp->index, &q->bitmaps[s]);
1316
1317 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1318 s, q->bitmaps[s],
1319 (unsigned long long) agg->S,
1320 (unsigned long long) agg->F,
1321 (unsigned long long) q->V);
1322
1323skip_update:
1324 qfq_slot_insert(grp, agg, roundedS);
1325}
1326
1327
1328
1329static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1330 enum update_reason reason)
1331{
1332 agg->initial_budget = agg->budget = agg->budgetmax;
1333
1334 qfq_update_agg_ts(q, agg, reason);
1335 if (q->in_serv_agg == NULL) {
1336 q->in_serv_agg = agg;
1337
1338 q->oldV = q->V = agg->S;
1339 } else if (agg != q->in_serv_agg)
1340 qfq_schedule_agg(q, agg);
1341}
1342
1343static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1344 struct qfq_aggregate *agg)
1345{
1346 unsigned int i, offset;
1347 u64 roundedS;
1348
1349 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1350 offset = (roundedS - grp->S) >> grp->slot_shift;
1351
1352 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1353
1354 hlist_del(&agg->next);
1355 if (hlist_empty(&grp->slots[i]))
1356 __clear_bit(offset, &grp->full_slots);
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1367{
1368 struct qfq_group *grp = agg->grp;
1369 unsigned long mask;
1370 u64 roundedS;
1371 int s;
1372
1373 if (agg == q->in_serv_agg) {
1374 charge_actual_service(agg);
1375 q->in_serv_agg = qfq_choose_next_agg(q);
1376 return;
1377 }
1378
1379 agg->F = agg->S;
1380 qfq_slot_remove(q, grp, agg);
1381
1382 if (!grp->full_slots) {
1383 __clear_bit(grp->index, &q->bitmaps[IR]);
1384 __clear_bit(grp->index, &q->bitmaps[EB]);
1385 __clear_bit(grp->index, &q->bitmaps[IB]);
1386
1387 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1388 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1389 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1390 if (mask)
1391 mask = ~((1UL << __fls(mask)) - 1);
1392 else
1393 mask = ~0UL;
1394 qfq_move_groups(q, mask, EB, ER);
1395 qfq_move_groups(q, mask, IB, IR);
1396 }
1397 __clear_bit(grp->index, &q->bitmaps[ER]);
1398 } else if (hlist_empty(&grp->slots[grp->front])) {
1399 agg = qfq_slot_scan(grp);
1400 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1401 if (grp->S != roundedS) {
1402 __clear_bit(grp->index, &q->bitmaps[ER]);
1403 __clear_bit(grp->index, &q->bitmaps[IR]);
1404 __clear_bit(grp->index, &q->bitmaps[EB]);
1405 __clear_bit(grp->index, &q->bitmaps[IB]);
1406 grp->S = roundedS;
1407 grp->F = roundedS + (2ULL << grp->slot_shift);
1408 s = qfq_calc_state(q, grp);
1409 __set_bit(grp->index, &q->bitmaps[s]);
1410 }
1411 }
1412}
1413
1414static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1415{
1416 struct qfq_sched *q = qdisc_priv(sch);
1417 struct qfq_class *cl = (struct qfq_class *)arg;
1418
1419 if (cl->qdisc->q.qlen == 0)
1420 qfq_deactivate_class(q, cl);
1421}
1422
1423static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
1424 struct hlist_head *slot)
1425{
1426 struct qfq_aggregate *agg;
1427 struct qfq_class *cl;
1428 unsigned int len;
1429
1430 hlist_for_each_entry(agg, slot, next) {
1431 list_for_each_entry(cl, &agg->active, alist) {
1432
1433 if (!cl->qdisc->ops->drop)
1434 continue;
1435
1436 len = cl->qdisc->ops->drop(cl->qdisc);
1437 if (len > 0) {
1438 if (cl->qdisc->q.qlen == 0)
1439 qfq_deactivate_class(q, cl);
1440
1441 return len;
1442 }
1443 }
1444 }
1445 return 0;
1446}
1447
1448static unsigned int qfq_drop(struct Qdisc *sch)
1449{
1450 struct qfq_sched *q = qdisc_priv(sch);
1451 struct qfq_group *grp;
1452 unsigned int i, j, len;
1453
1454 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1455 grp = &q->groups[i];
1456 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1457 len = qfq_drop_from_slot(q, &grp->slots[j]);
1458 if (len > 0) {
1459 sch->q.qlen--;
1460 return len;
1461 }
1462 }
1463
1464 }
1465
1466 return 0;
1467}
1468
1469static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1470{
1471 struct qfq_sched *q = qdisc_priv(sch);
1472 struct qfq_group *grp;
1473 int i, j, err;
1474 u32 max_cl_shift, maxbudg_shift, max_classes;
1475
1476 err = qdisc_class_hash_init(&q->clhash);
1477 if (err < 0)
1478 return err;
1479
1480 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1481 max_classes = QFQ_MAX_AGG_CLASSES;
1482 else
1483 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1484
1485 max_cl_shift = __fls(max_classes);
1486 q->max_agg_classes = 1<<max_cl_shift;
1487
1488
1489 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1490 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1491
1492 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1493 grp = &q->groups[i];
1494 grp->index = i;
1495 grp->slot_shift = q->min_slot_shift + i;
1496 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1497 INIT_HLIST_HEAD(&grp->slots[j]);
1498 }
1499
1500 INIT_HLIST_HEAD(&q->nonfull_aggs);
1501
1502 return 0;
1503}
1504
1505static void qfq_reset_qdisc(struct Qdisc *sch)
1506{
1507 struct qfq_sched *q = qdisc_priv(sch);
1508 struct qfq_class *cl;
1509 unsigned int i;
1510
1511 for (i = 0; i < q->clhash.hashsize; i++) {
1512 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1513 if (cl->qdisc->q.qlen > 0)
1514 qfq_deactivate_class(q, cl);
1515
1516 qdisc_reset(cl->qdisc);
1517 }
1518 }
1519 sch->q.qlen = 0;
1520}
1521
1522static void qfq_destroy_qdisc(struct Qdisc *sch)
1523{
1524 struct qfq_sched *q = qdisc_priv(sch);
1525 struct qfq_class *cl;
1526 struct hlist_node *next;
1527 unsigned int i;
1528
1529 tcf_destroy_chain(&q->filter_list);
1530
1531 for (i = 0; i < q->clhash.hashsize; i++) {
1532 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1533 common.hnode) {
1534 qfq_destroy_class(sch, cl);
1535 }
1536 }
1537 qdisc_class_hash_destroy(&q->clhash);
1538}
1539
1540static const struct Qdisc_class_ops qfq_class_ops = {
1541 .change = qfq_change_class,
1542 .delete = qfq_delete_class,
1543 .get = qfq_get_class,
1544 .put = qfq_put_class,
1545 .tcf_chain = qfq_tcf_chain,
1546 .bind_tcf = qfq_bind_tcf,
1547 .unbind_tcf = qfq_unbind_tcf,
1548 .graft = qfq_graft_class,
1549 .leaf = qfq_class_leaf,
1550 .qlen_notify = qfq_qlen_notify,
1551 .dump = qfq_dump_class,
1552 .dump_stats = qfq_dump_class_stats,
1553 .walk = qfq_walk,
1554};
1555
1556static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1557 .cl_ops = &qfq_class_ops,
1558 .id = "qfq",
1559 .priv_size = sizeof(struct qfq_sched),
1560 .enqueue = qfq_enqueue,
1561 .dequeue = qfq_dequeue,
1562 .peek = qdisc_peek_dequeued,
1563 .drop = qfq_drop,
1564 .init = qfq_init_qdisc,
1565 .reset = qfq_reset_qdisc,
1566 .destroy = qfq_destroy_qdisc,
1567 .owner = THIS_MODULE,
1568};
1569
1570static int __init qfq_init(void)
1571{
1572 return register_qdisc(&qfq_qdisc_ops);
1573}
1574
1575static void __exit qfq_exit(void)
1576{
1577 unregister_qdisc(&qfq_qdisc_ops);
1578}
1579
1580module_init(qfq_init);
1581module_exit(qfq_exit);
1582MODULE_LICENSE("GPL");
1583