1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116
117#define QFQ_MTU_SHIFT 16
118#define QFQ_MIN_LMAX 512
119
120#define QFQ_MAX_AGG_CLASSES 8
121
122
123
124
125
126enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
127
128struct qfq_group;
129
130struct qfq_aggregate;
131
132struct qfq_class {
133 struct Qdisc_class_common common;
134
135 unsigned int refcnt;
136 unsigned int filter_cnt;
137
138 struct gnet_stats_basic_packed bstats;
139 struct gnet_stats_queue qstats;
140 struct gnet_stats_rate_est64 rate_est;
141 struct Qdisc *qdisc;
142 struct list_head alist;
143 struct qfq_aggregate *agg;
144 int deficit;
145};
146
147struct qfq_aggregate {
148 struct hlist_node next;
149 u64 S, F;
150
151
152
153
154
155 struct qfq_group *grp;
156
157
158 u32 class_weight;
159
160 int lmax;
161
162 u32 inv_w;
163 u32 budgetmax;
164 u32 initial_budget, budget;
165
166 int num_classes;
167 struct list_head active;
168
169 struct hlist_node nonfull_next;
170};
171
172struct qfq_group {
173 u64 S, F;
174 unsigned int slot_shift;
175 unsigned int index;
176 unsigned int front;
177 unsigned long full_slots;
178
179
180 struct hlist_head slots[QFQ_MAX_SLOTS];
181};
182
183struct qfq_sched {
184 struct tcf_proto __rcu *filter_list;
185 struct Qdisc_class_hash clhash;
186
187 u64 oldV, V;
188 struct qfq_aggregate *in_serv_agg;
189 u32 wsum;
190 u32 iwsum;
191
192 unsigned long bitmaps[QFQ_MAX_STATE];
193 struct qfq_group groups[QFQ_MAX_INDEX + 1];
194 u32 min_slot_shift;
195
196 u32 max_agg_classes;
197 struct hlist_head nonfull_aggs;
198};
199
200
201
202
203
204
205
206
207enum update_reason {enqueue, requeue};
208
209static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
210{
211 struct qfq_sched *q = qdisc_priv(sch);
212 struct Qdisc_class_common *clc;
213
214 clc = qdisc_class_find(&q->clhash, classid);
215 if (clc == NULL)
216 return NULL;
217 return container_of(clc, struct qfq_class, common);
218}
219
220static void qfq_purge_queue(struct qfq_class *cl)
221{
222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
224
225 qdisc_reset(cl->qdisc);
226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
227}
228
229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
230 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
231 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
232};
233
234
235
236
237
238
239static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
240{
241 u64 slot_size = (u64)maxlen * inv_w;
242 unsigned long size_map;
243 int index = 0;
244
245 size_map = slot_size >> min_slot_shift;
246 if (!size_map)
247 goto out;
248
249 index = __fls(size_map) + 1;
250 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
251
252 if (index < 0)
253 index = 0;
254out:
255 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
256 (unsigned long) ONE_FP/inv_w, maxlen, index);
257
258 return index;
259}
260
261static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
262static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
263 enum update_reason);
264
265static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
266 u32 lmax, u32 weight)
267{
268 INIT_LIST_HEAD(&agg->active);
269 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
270
271 agg->lmax = lmax;
272 agg->class_weight = weight;
273}
274
275static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
276 u32 lmax, u32 weight)
277{
278 struct qfq_aggregate *agg;
279
280 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
281 if (agg->lmax == lmax && agg->class_weight == weight)
282 return agg;
283
284 return NULL;
285}
286
287
288
289static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
290 int new_num_classes)
291{
292 u32 new_agg_weight;
293
294 if (new_num_classes == q->max_agg_classes)
295 hlist_del_init(&agg->nonfull_next);
296
297 if (agg->num_classes > new_num_classes &&
298 new_num_classes == q->max_agg_classes - 1)
299 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
300
301
302
303
304
305 agg->budgetmax = new_num_classes * agg->lmax;
306 new_agg_weight = agg->class_weight * new_num_classes;
307 agg->inv_w = ONE_FP/new_agg_weight;
308
309 if (agg->grp == NULL) {
310 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
311 q->min_slot_shift);
312 agg->grp = &q->groups[i];
313 }
314
315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
318
319 agg->num_classes = new_num_classes;
320}
321
322
323static void qfq_add_to_agg(struct qfq_sched *q,
324 struct qfq_aggregate *agg,
325 struct qfq_class *cl)
326{
327 cl->agg = agg;
328
329 qfq_update_agg(q, agg, agg->num_classes+1);
330 if (cl->qdisc->q.qlen > 0) {
331 list_add_tail(&cl->alist, &agg->active);
332 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
333 cl && q->in_serv_agg != agg)
334 qfq_activate_agg(q, agg, enqueue);
335 }
336}
337
338static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
339
340static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
341{
342 hlist_del_init(&agg->nonfull_next);
343 q->wsum -= agg->class_weight;
344 if (q->wsum != 0)
345 q->iwsum = ONE_FP / q->wsum;
346
347 if (q->in_serv_agg == agg)
348 q->in_serv_agg = qfq_choose_next_agg(q);
349 kfree(agg);
350}
351
352
353static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
354{
355 struct qfq_aggregate *agg = cl->agg;
356
357
358 list_del(&cl->alist);
359 if (list_empty(&agg->active))
360 qfq_deactivate_agg(q, agg);
361}
362
363
364static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
365{
366 struct qfq_aggregate *agg = cl->agg;
367
368 cl->agg = NULL;
369 if (agg->num_classes == 1) {
370 qfq_destroy_agg(q, agg);
371 return;
372 }
373 qfq_update_agg(q, agg, agg->num_classes-1);
374}
375
376
377static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
378{
379 if (cl->qdisc->q.qlen > 0)
380 qfq_deactivate_class(q, cl);
381
382 qfq_rm_from_agg(q, cl);
383}
384
385
386static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
387 u32 lmax)
388{
389 struct qfq_sched *q = qdisc_priv(sch);
390 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
391
392 if (new_agg == NULL) {
393 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
394 if (new_agg == NULL)
395 return -ENOBUFS;
396 qfq_init_agg(q, new_agg, lmax, weight);
397 }
398 qfq_deact_rm_from_agg(q, cl);
399 qfq_add_to_agg(q, new_agg, cl);
400
401 return 0;
402}
403
404static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
405 struct nlattr **tca, unsigned long *arg)
406{
407 struct qfq_sched *q = qdisc_priv(sch);
408 struct qfq_class *cl = (struct qfq_class *)*arg;
409 bool existing = false;
410 struct nlattr *tb[TCA_QFQ_MAX + 1];
411 struct qfq_aggregate *new_agg = NULL;
412 u32 weight, lmax, inv_w;
413 int err;
414 int delta_w;
415
416 if (tca[TCA_OPTIONS] == NULL) {
417 pr_notice("qfq: no options\n");
418 return -EINVAL;
419 }
420
421 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
422 if (err < 0)
423 return err;
424
425 if (tb[TCA_QFQ_WEIGHT]) {
426 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
427 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
428 pr_notice("qfq: invalid weight %u\n", weight);
429 return -EINVAL;
430 }
431 } else
432 weight = 1;
433
434 if (tb[TCA_QFQ_LMAX]) {
435 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
436 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
437 pr_notice("qfq: invalid max length %u\n", lmax);
438 return -EINVAL;
439 }
440 } else
441 lmax = psched_mtu(qdisc_dev(sch));
442
443 inv_w = ONE_FP / weight;
444 weight = ONE_FP / inv_w;
445
446 if (cl != NULL &&
447 lmax == cl->agg->lmax &&
448 weight == cl->agg->class_weight)
449 return 0;
450
451 delta_w = weight - (cl ? cl->agg->class_weight : 0);
452
453 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
454 pr_notice("qfq: total weight out of range (%d + %u)\n",
455 delta_w, q->wsum);
456 return -EINVAL;
457 }
458
459 if (cl != NULL) {
460 if (tca[TCA_RATE]) {
461 err = gen_replace_estimator(&cl->bstats, NULL,
462 &cl->rate_est,
463 NULL,
464 qdisc_root_sleeping_running(sch),
465 tca[TCA_RATE]);
466 if (err)
467 return err;
468 }
469 existing = true;
470 goto set_change_agg;
471 }
472
473
474 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
475 if (cl == NULL)
476 return -ENOBUFS;
477
478 cl->refcnt = 1;
479 cl->common.classid = classid;
480 cl->deficit = lmax;
481
482 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
483 &pfifo_qdisc_ops, classid);
484 if (cl->qdisc == NULL)
485 cl->qdisc = &noop_qdisc;
486
487 if (tca[TCA_RATE]) {
488 err = gen_new_estimator(&cl->bstats, NULL,
489 &cl->rate_est,
490 NULL,
491 qdisc_root_sleeping_running(sch),
492 tca[TCA_RATE]);
493 if (err)
494 goto destroy_class;
495 }
496
497 sch_tree_lock(sch);
498 qdisc_class_hash_insert(&q->clhash, &cl->common);
499 sch_tree_unlock(sch);
500
501 qdisc_class_hash_grow(sch, &q->clhash);
502
503set_change_agg:
504 sch_tree_lock(sch);
505 new_agg = qfq_find_agg(q, lmax, weight);
506 if (new_agg == NULL) {
507 sch_tree_unlock(sch);
508 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
509 if (new_agg == NULL) {
510 err = -ENOBUFS;
511 gen_kill_estimator(&cl->bstats, &cl->rate_est);
512 goto destroy_class;
513 }
514 sch_tree_lock(sch);
515 qfq_init_agg(q, new_agg, lmax, weight);
516 }
517 if (existing)
518 qfq_deact_rm_from_agg(q, cl);
519 qfq_add_to_agg(q, new_agg, cl);
520 sch_tree_unlock(sch);
521
522 *arg = (unsigned long)cl;
523 return 0;
524
525destroy_class:
526 qdisc_destroy(cl->qdisc);
527 kfree(cl);
528 return err;
529}
530
531static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
532{
533 struct qfq_sched *q = qdisc_priv(sch);
534
535 qfq_rm_from_agg(q, cl);
536 gen_kill_estimator(&cl->bstats, &cl->rate_est);
537 qdisc_destroy(cl->qdisc);
538 kfree(cl);
539}
540
541static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
542{
543 struct qfq_sched *q = qdisc_priv(sch);
544 struct qfq_class *cl = (struct qfq_class *)arg;
545
546 if (cl->filter_cnt > 0)
547 return -EBUSY;
548
549 sch_tree_lock(sch);
550
551 qfq_purge_queue(cl);
552 qdisc_class_hash_remove(&q->clhash, &cl->common);
553
554 BUG_ON(--cl->refcnt == 0);
555
556
557
558
559
560 sch_tree_unlock(sch);
561 return 0;
562}
563
564static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
565{
566 struct qfq_class *cl = qfq_find_class(sch, classid);
567
568 if (cl != NULL)
569 cl->refcnt++;
570
571 return (unsigned long)cl;
572}
573
574static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
575{
576 struct qfq_class *cl = (struct qfq_class *)arg;
577
578 if (--cl->refcnt == 0)
579 qfq_destroy_class(sch, cl);
580}
581
582static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
583 unsigned long cl)
584{
585 struct qfq_sched *q = qdisc_priv(sch);
586
587 if (cl)
588 return NULL;
589
590 return &q->filter_list;
591}
592
593static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
594 u32 classid)
595{
596 struct qfq_class *cl = qfq_find_class(sch, classid);
597
598 if (cl != NULL)
599 cl->filter_cnt++;
600
601 return (unsigned long)cl;
602}
603
604static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
605{
606 struct qfq_class *cl = (struct qfq_class *)arg;
607
608 cl->filter_cnt--;
609}
610
611static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
612 struct Qdisc *new, struct Qdisc **old)
613{
614 struct qfq_class *cl = (struct qfq_class *)arg;
615
616 if (new == NULL) {
617 new = qdisc_create_dflt(sch->dev_queue,
618 &pfifo_qdisc_ops, cl->common.classid);
619 if (new == NULL)
620 new = &noop_qdisc;
621 }
622
623 *old = qdisc_replace(sch, new, &cl->qdisc);
624 return 0;
625}
626
627static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
628{
629 struct qfq_class *cl = (struct qfq_class *)arg;
630
631 return cl->qdisc;
632}
633
634static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
635 struct sk_buff *skb, struct tcmsg *tcm)
636{
637 struct qfq_class *cl = (struct qfq_class *)arg;
638 struct nlattr *nest;
639
640 tcm->tcm_parent = TC_H_ROOT;
641 tcm->tcm_handle = cl->common.classid;
642 tcm->tcm_info = cl->qdisc->handle;
643
644 nest = nla_nest_start(skb, TCA_OPTIONS);
645 if (nest == NULL)
646 goto nla_put_failure;
647 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
648 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
649 goto nla_put_failure;
650 return nla_nest_end(skb, nest);
651
652nla_put_failure:
653 nla_nest_cancel(skb, nest);
654 return -EMSGSIZE;
655}
656
657static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
658 struct gnet_dump *d)
659{
660 struct qfq_class *cl = (struct qfq_class *)arg;
661 struct tc_qfq_stats xstats;
662
663 memset(&xstats, 0, sizeof(xstats));
664
665 xstats.weight = cl->agg->class_weight;
666 xstats.lmax = cl->agg->lmax;
667
668 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
669 d, NULL, &cl->bstats) < 0 ||
670 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
671 gnet_stats_copy_queue(d, NULL,
672 &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
673 return -1;
674
675 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
676}
677
678static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
679{
680 struct qfq_sched *q = qdisc_priv(sch);
681 struct qfq_class *cl;
682 unsigned int i;
683
684 if (arg->stop)
685 return;
686
687 for (i = 0; i < q->clhash.hashsize; i++) {
688 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
689 if (arg->count < arg->skip) {
690 arg->count++;
691 continue;
692 }
693 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
694 arg->stop = 1;
695 return;
696 }
697 arg->count++;
698 }
699 }
700}
701
702static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
703 int *qerr)
704{
705 struct qfq_sched *q = qdisc_priv(sch);
706 struct qfq_class *cl;
707 struct tcf_result res;
708 struct tcf_proto *fl;
709 int result;
710
711 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
712 pr_debug("qfq_classify: found %d\n", skb->priority);
713 cl = qfq_find_class(sch, skb->priority);
714 if (cl != NULL)
715 return cl;
716 }
717
718 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
719 fl = rcu_dereference_bh(q->filter_list);
720 result = tc_classify(skb, fl, &res, false);
721 if (result >= 0) {
722#ifdef CONFIG_NET_CLS_ACT
723 switch (result) {
724 case TC_ACT_QUEUED:
725 case TC_ACT_STOLEN:
726 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
727 case TC_ACT_SHOT:
728 return NULL;
729 }
730#endif
731 cl = (struct qfq_class *)res.class;
732 if (cl == NULL)
733 cl = qfq_find_class(sch, res.classid);
734 return cl;
735 }
736
737 return NULL;
738}
739
740
741static inline int qfq_gt(u64 a, u64 b)
742{
743 return (s64)(a - b) > 0;
744}
745
746
747static inline u64 qfq_round_down(u64 ts, unsigned int shift)
748{
749 return ts & ~((1ULL << shift) - 1);
750}
751
752
753static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
754 unsigned long bitmap)
755{
756 int index = __ffs(bitmap);
757 return &q->groups[index];
758}
759
760static inline unsigned long mask_from(unsigned long bitmap, int from)
761{
762 return bitmap & ~((1UL << from) - 1);
763}
764
765
766
767
768
769
770static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
771{
772
773 unsigned int state = qfq_gt(grp->S, q->V);
774 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
775 struct qfq_group *next;
776
777 if (mask) {
778 next = qfq_ffs(q, mask);
779 if (qfq_gt(grp->F, next->F))
780 state |= EB;
781 }
782
783 return state;
784}
785
786
787
788
789
790
791
792
793static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
794 int src, int dst)
795{
796 q->bitmaps[dst] |= q->bitmaps[src] & mask;
797 q->bitmaps[src] &= ~mask;
798}
799
800static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
801{
802 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
803 struct qfq_group *next;
804
805 if (mask) {
806 next = qfq_ffs(q, mask);
807 if (!qfq_gt(next->F, old_F))
808 return;
809 }
810
811 mask = (1UL << index) - 1;
812 qfq_move_groups(q, mask, EB, ER);
813 qfq_move_groups(q, mask, IB, IR);
814}
815
816
817
818
819
820
821
822
823
824
825
826static void qfq_make_eligible(struct qfq_sched *q)
827{
828 unsigned long vslot = q->V >> q->min_slot_shift;
829 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
830
831 if (vslot != old_vslot) {
832 unsigned long mask;
833 int last_flip_pos = fls(vslot ^ old_vslot);
834
835 if (last_flip_pos > 31)
836 mask = ~0UL;
837 else
838 mask = (1UL << last_flip_pos) - 1;
839
840 qfq_move_groups(q, mask, IR, ER);
841 qfq_move_groups(q, mask, IB, EB);
842 }
843}
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
901 u64 roundedS)
902{
903 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
904 unsigned int i;
905
906 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
907 u64 deltaS = roundedS - grp->S -
908 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
909 agg->S -= deltaS;
910 agg->F -= deltaS;
911 slot = QFQ_MAX_SLOTS - 2;
912 }
913
914 i = (grp->front + slot) % QFQ_MAX_SLOTS;
915
916 hlist_add_head(&agg->next, &grp->slots[i]);
917 __set_bit(slot, &grp->full_slots);
918}
919
920
921static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
922{
923 return hlist_entry(grp->slots[grp->front].first,
924 struct qfq_aggregate, next);
925}
926
927
928
929
930static void qfq_front_slot_remove(struct qfq_group *grp)
931{
932 struct qfq_aggregate *agg = qfq_slot_head(grp);
933
934 BUG_ON(!agg);
935 hlist_del(&agg->next);
936 if (hlist_empty(&grp->slots[grp->front]))
937 __clear_bit(0, &grp->full_slots);
938}
939
940
941
942
943
944
945static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
946{
947 unsigned int i;
948
949 pr_debug("qfq slot_scan: grp %u full %#lx\n",
950 grp->index, grp->full_slots);
951
952 if (grp->full_slots == 0)
953 return NULL;
954
955 i = __ffs(grp->full_slots);
956 if (i > 0) {
957 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
958 grp->full_slots >>= i;
959 }
960
961 return qfq_slot_head(grp);
962}
963
964
965
966
967
968
969
970
971
972
973static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
974{
975 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
976
977 grp->full_slots <<= i;
978 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
979}
980
981static void qfq_update_eligible(struct qfq_sched *q)
982{
983 struct qfq_group *grp;
984 unsigned long ineligible;
985
986 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
987 if (ineligible) {
988 if (!q->bitmaps[ER]) {
989 grp = qfq_ffs(q, ineligible);
990 if (qfq_gt(grp->S, q->V))
991 q->V = grp->S;
992 }
993 qfq_make_eligible(q);
994 }
995}
996
997
998static void agg_dequeue(struct qfq_aggregate *agg,
999 struct qfq_class *cl, unsigned int len)
1000{
1001 qdisc_dequeue_peeked(cl->qdisc);
1002
1003 cl->deficit -= (int) len;
1004
1005 if (cl->qdisc->q.qlen == 0)
1006 list_del(&cl->alist);
1007 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
1008 cl->deficit += agg->lmax;
1009 list_move_tail(&cl->alist, &agg->active);
1010 }
1011}
1012
1013static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
1014 struct qfq_class **cl,
1015 unsigned int *len)
1016{
1017 struct sk_buff *skb;
1018
1019 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
1020 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
1021 if (skb == NULL)
1022 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
1023 else
1024 *len = qdisc_pkt_len(skb);
1025
1026 return skb;
1027}
1028
1029
1030static inline void charge_actual_service(struct qfq_aggregate *agg)
1031{
1032
1033
1034
1035
1036
1037 u32 service_received = min(agg->budgetmax,
1038 agg->initial_budget - agg->budget);
1039
1040 agg->F = agg->S + (u64)service_received * agg->inv_w;
1041}
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1056{
1057 unsigned long mask;
1058 u64 limit, roundedF;
1059 int slot_shift = agg->grp->slot_shift;
1060
1061 roundedF = qfq_round_down(agg->F, slot_shift);
1062 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1063
1064 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1065
1066 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1067 if (mask) {
1068 struct qfq_group *next = qfq_ffs(q, mask);
1069 if (qfq_gt(roundedF, next->F)) {
1070 if (qfq_gt(limit, next->F))
1071 agg->S = next->F;
1072 else
1073 agg->S = limit;
1074 return;
1075 }
1076 }
1077 agg->S = q->V;
1078 } else
1079 agg->S = agg->F;
1080}
1081
1082
1083
1084
1085
1086
1087static inline void
1088qfq_update_agg_ts(struct qfq_sched *q,
1089 struct qfq_aggregate *agg, enum update_reason reason)
1090{
1091 if (reason != requeue)
1092 qfq_update_start(q, agg);
1093 else
1094 agg->S = agg->F;
1095
1096 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1097}
1098
1099static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1100
1101static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1102{
1103 struct qfq_sched *q = qdisc_priv(sch);
1104 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1105 struct qfq_class *cl;
1106 struct sk_buff *skb = NULL;
1107
1108 unsigned int len = 0;
1109
1110 if (in_serv_agg == NULL)
1111 return NULL;
1112
1113 if (!list_empty(&in_serv_agg->active))
1114 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1115
1116
1117
1118
1119
1120
1121 if (len == 0 || in_serv_agg->budget < len) {
1122 charge_actual_service(in_serv_agg);
1123
1124
1125 in_serv_agg->initial_budget = in_serv_agg->budget =
1126 in_serv_agg->budgetmax;
1127
1128 if (!list_empty(&in_serv_agg->active)) {
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139 qfq_update_agg_ts(q, in_serv_agg, requeue);
1140 qfq_schedule_agg(q, in_serv_agg);
1141 } else if (sch->q.qlen == 0) {
1142 q->in_serv_agg = NULL;
1143 return NULL;
1144 }
1145
1146
1147
1148
1149
1150 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1151 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1152 }
1153 if (!skb)
1154 return NULL;
1155
1156 qdisc_qstats_backlog_dec(sch, skb);
1157 sch->q.qlen--;
1158 qdisc_bstats_update(sch, skb);
1159
1160 agg_dequeue(in_serv_agg, cl, len);
1161
1162
1163
1164
1165 if (unlikely(in_serv_agg->budget < len))
1166 in_serv_agg->budget = 0;
1167 else
1168 in_serv_agg->budget -= len;
1169
1170 q->V += (u64)len * q->iwsum;
1171 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1172 len, (unsigned long long) in_serv_agg->F,
1173 (unsigned long long) q->V);
1174
1175 return skb;
1176}
1177
1178static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1179{
1180 struct qfq_group *grp;
1181 struct qfq_aggregate *agg, *new_front_agg;
1182 u64 old_F;
1183
1184 qfq_update_eligible(q);
1185 q->oldV = q->V;
1186
1187 if (!q->bitmaps[ER])
1188 return NULL;
1189
1190 grp = qfq_ffs(q, q->bitmaps[ER]);
1191 old_F = grp->F;
1192
1193 agg = qfq_slot_head(grp);
1194
1195
1196 qfq_front_slot_remove(grp);
1197
1198 new_front_agg = qfq_slot_scan(grp);
1199
1200 if (new_front_agg == NULL)
1201 __clear_bit(grp->index, &q->bitmaps[ER]);
1202 else {
1203 u64 roundedS = qfq_round_down(new_front_agg->S,
1204 grp->slot_shift);
1205 unsigned int s;
1206
1207 if (grp->S == roundedS)
1208 return agg;
1209 grp->S = roundedS;
1210 grp->F = roundedS + (2ULL << grp->slot_shift);
1211 __clear_bit(grp->index, &q->bitmaps[ER]);
1212 s = qfq_calc_state(q, grp);
1213 __set_bit(grp->index, &q->bitmaps[s]);
1214 }
1215
1216 qfq_unblock_groups(q, grp->index, old_F);
1217
1218 return agg;
1219}
1220
1221static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1222 struct sk_buff **to_free)
1223{
1224 struct qfq_sched *q = qdisc_priv(sch);
1225 struct qfq_class *cl;
1226 struct qfq_aggregate *agg;
1227 int err = 0;
1228
1229 cl = qfq_classify(skb, sch, &err);
1230 if (cl == NULL) {
1231 if (err & __NET_XMIT_BYPASS)
1232 qdisc_qstats_drop(sch);
1233 kfree_skb(skb);
1234 return err;
1235 }
1236 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1237
1238 if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
1239 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1240 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
1241 err = qfq_change_agg(sch, cl, cl->agg->class_weight,
1242 qdisc_pkt_len(skb));
1243 if (err) {
1244 cl->qstats.drops++;
1245 return qdisc_drop(skb, sch, to_free);
1246 }
1247 }
1248
1249 err = qdisc_enqueue(skb, cl->qdisc, to_free);
1250 if (unlikely(err != NET_XMIT_SUCCESS)) {
1251 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1252 if (net_xmit_drop_count(err)) {
1253 cl->qstats.drops++;
1254 qdisc_qstats_drop(sch);
1255 }
1256 return err;
1257 }
1258
1259 bstats_update(&cl->bstats, skb);
1260 qdisc_qstats_backlog_inc(sch, skb);
1261 ++sch->q.qlen;
1262
1263 agg = cl->agg;
1264
1265 if (cl->qdisc->q.qlen != 1) {
1266 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1267 list_first_entry(&agg->active, struct qfq_class, alist)
1268 == cl && cl->deficit < qdisc_pkt_len(skb))
1269 list_move_tail(&cl->alist, &agg->active);
1270
1271 return err;
1272 }
1273
1274
1275 cl->deficit = agg->lmax;
1276 list_add_tail(&cl->alist, &agg->active);
1277
1278 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1279 q->in_serv_agg == agg)
1280 return err;
1281
1282 qfq_activate_agg(q, agg, enqueue);
1283
1284 return err;
1285}
1286
1287
1288
1289
1290static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1291{
1292 struct qfq_group *grp = agg->grp;
1293 u64 roundedS;
1294 int s;
1295
1296 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307 if (grp->full_slots) {
1308 if (!qfq_gt(grp->S, agg->S))
1309 goto skip_update;
1310
1311
1312 qfq_slot_rotate(grp, roundedS);
1313
1314 __clear_bit(grp->index, &q->bitmaps[IR]);
1315 __clear_bit(grp->index, &q->bitmaps[IB]);
1316 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1317 q->in_serv_agg == NULL)
1318 q->V = roundedS;
1319
1320 grp->S = roundedS;
1321 grp->F = roundedS + (2ULL << grp->slot_shift);
1322 s = qfq_calc_state(q, grp);
1323 __set_bit(grp->index, &q->bitmaps[s]);
1324
1325 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1326 s, q->bitmaps[s],
1327 (unsigned long long) agg->S,
1328 (unsigned long long) agg->F,
1329 (unsigned long long) q->V);
1330
1331skip_update:
1332 qfq_slot_insert(grp, agg, roundedS);
1333}
1334
1335
1336
1337static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1338 enum update_reason reason)
1339{
1340 agg->initial_budget = agg->budget = agg->budgetmax;
1341
1342 qfq_update_agg_ts(q, agg, reason);
1343 if (q->in_serv_agg == NULL) {
1344 q->in_serv_agg = agg;
1345
1346 q->oldV = q->V = agg->S;
1347 } else if (agg != q->in_serv_agg)
1348 qfq_schedule_agg(q, agg);
1349}
1350
1351static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1352 struct qfq_aggregate *agg)
1353{
1354 unsigned int i, offset;
1355 u64 roundedS;
1356
1357 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1358 offset = (roundedS - grp->S) >> grp->slot_shift;
1359
1360 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1361
1362 hlist_del(&agg->next);
1363 if (hlist_empty(&grp->slots[i]))
1364 __clear_bit(offset, &grp->full_slots);
1365}
1366
1367
1368
1369
1370
1371
1372
1373
1374static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1375{
1376 struct qfq_group *grp = agg->grp;
1377 unsigned long mask;
1378 u64 roundedS;
1379 int s;
1380
1381 if (agg == q->in_serv_agg) {
1382 charge_actual_service(agg);
1383 q->in_serv_agg = qfq_choose_next_agg(q);
1384 return;
1385 }
1386
1387 agg->F = agg->S;
1388 qfq_slot_remove(q, grp, agg);
1389
1390 if (!grp->full_slots) {
1391 __clear_bit(grp->index, &q->bitmaps[IR]);
1392 __clear_bit(grp->index, &q->bitmaps[EB]);
1393 __clear_bit(grp->index, &q->bitmaps[IB]);
1394
1395 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1396 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1397 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1398 if (mask)
1399 mask = ~((1UL << __fls(mask)) - 1);
1400 else
1401 mask = ~0UL;
1402 qfq_move_groups(q, mask, EB, ER);
1403 qfq_move_groups(q, mask, IB, IR);
1404 }
1405 __clear_bit(grp->index, &q->bitmaps[ER]);
1406 } else if (hlist_empty(&grp->slots[grp->front])) {
1407 agg = qfq_slot_scan(grp);
1408 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1409 if (grp->S != roundedS) {
1410 __clear_bit(grp->index, &q->bitmaps[ER]);
1411 __clear_bit(grp->index, &q->bitmaps[IR]);
1412 __clear_bit(grp->index, &q->bitmaps[EB]);
1413 __clear_bit(grp->index, &q->bitmaps[IB]);
1414 grp->S = roundedS;
1415 grp->F = roundedS + (2ULL << grp->slot_shift);
1416 s = qfq_calc_state(q, grp);
1417 __set_bit(grp->index, &q->bitmaps[s]);
1418 }
1419 }
1420}
1421
1422static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1423{
1424 struct qfq_sched *q = qdisc_priv(sch);
1425 struct qfq_class *cl = (struct qfq_class *)arg;
1426
1427 if (cl->qdisc->q.qlen == 0)
1428 qfq_deactivate_class(q, cl);
1429}
1430
1431static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1432{
1433 struct qfq_sched *q = qdisc_priv(sch);
1434 struct qfq_group *grp;
1435 int i, j, err;
1436 u32 max_cl_shift, maxbudg_shift, max_classes;
1437
1438 err = qdisc_class_hash_init(&q->clhash);
1439 if (err < 0)
1440 return err;
1441
1442 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1443 max_classes = QFQ_MAX_AGG_CLASSES;
1444 else
1445 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1446
1447 max_cl_shift = __fls(max_classes);
1448 q->max_agg_classes = 1<<max_cl_shift;
1449
1450
1451 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1452 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1453
1454 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1455 grp = &q->groups[i];
1456 grp->index = i;
1457 grp->slot_shift = q->min_slot_shift + i;
1458 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1459 INIT_HLIST_HEAD(&grp->slots[j]);
1460 }
1461
1462 INIT_HLIST_HEAD(&q->nonfull_aggs);
1463
1464 return 0;
1465}
1466
1467static void qfq_reset_qdisc(struct Qdisc *sch)
1468{
1469 struct qfq_sched *q = qdisc_priv(sch);
1470 struct qfq_class *cl;
1471 unsigned int i;
1472
1473 for (i = 0; i < q->clhash.hashsize; i++) {
1474 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1475 if (cl->qdisc->q.qlen > 0)
1476 qfq_deactivate_class(q, cl);
1477
1478 qdisc_reset(cl->qdisc);
1479 }
1480 }
1481 sch->qstats.backlog = 0;
1482 sch->q.qlen = 0;
1483}
1484
1485static void qfq_destroy_qdisc(struct Qdisc *sch)
1486{
1487 struct qfq_sched *q = qdisc_priv(sch);
1488 struct qfq_class *cl;
1489 struct hlist_node *next;
1490 unsigned int i;
1491
1492 tcf_destroy_chain(&q->filter_list);
1493
1494 for (i = 0; i < q->clhash.hashsize; i++) {
1495 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1496 common.hnode) {
1497 qfq_destroy_class(sch, cl);
1498 }
1499 }
1500 qdisc_class_hash_destroy(&q->clhash);
1501}
1502
1503static const struct Qdisc_class_ops qfq_class_ops = {
1504 .change = qfq_change_class,
1505 .delete = qfq_delete_class,
1506 .get = qfq_get_class,
1507 .put = qfq_put_class,
1508 .tcf_chain = qfq_tcf_chain,
1509 .bind_tcf = qfq_bind_tcf,
1510 .unbind_tcf = qfq_unbind_tcf,
1511 .graft = qfq_graft_class,
1512 .leaf = qfq_class_leaf,
1513 .qlen_notify = qfq_qlen_notify,
1514 .dump = qfq_dump_class,
1515 .dump_stats = qfq_dump_class_stats,
1516 .walk = qfq_walk,
1517};
1518
1519static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1520 .cl_ops = &qfq_class_ops,
1521 .id = "qfq",
1522 .priv_size = sizeof(struct qfq_sched),
1523 .enqueue = qfq_enqueue,
1524 .dequeue = qfq_dequeue,
1525 .peek = qdisc_peek_dequeued,
1526 .init = qfq_init_qdisc,
1527 .reset = qfq_reset_qdisc,
1528 .destroy = qfq_destroy_qdisc,
1529 .owner = THIS_MODULE,
1530};
1531
1532static int __init qfq_init(void)
1533{
1534 return register_qdisc(&qfq_qdisc_ops);
1535}
1536
1537static void __exit qfq_exit(void)
1538{
1539 unregister_qdisc(&qfq_qdisc_ops);
1540}
1541
1542module_init(qfq_init);
1543module_exit(qfq_exit);
1544MODULE_LICENSE("GPL");
1545