1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/bitops.h>
15#include <linux/errno.h>
16#include <linux/netdevice.h>
17#include <linux/pkt_sched.h>
18#include <net/sch_generic.h>
19#include <net/pkt_sched.h>
20#include <net/pkt_cls.h>
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define QFQ_MAX_SLOTS 32
98
99
100
101
102
103
104
105
106
107
108#define QFQ_MAX_INDEX 24
109#define QFQ_MAX_WSHIFT 10
110
111#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
112#define QFQ_MAX_WSUM (64*QFQ_MAX_WEIGHT)
113
114#define FRAC_BITS 30
115#define ONE_FP (1UL << FRAC_BITS)
116#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
117
118#define QFQ_MTU_SHIFT 16
119#define QFQ_MIN_LMAX 512
120
121#define QFQ_MAX_AGG_CLASSES 8
122
123
124
125
126
127enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
128
129struct qfq_group;
130
131struct qfq_aggregate;
132
133struct qfq_class {
134 struct Qdisc_class_common common;
135
136 unsigned int refcnt;
137 unsigned int filter_cnt;
138
139 struct gnet_stats_basic_packed bstats;
140 struct gnet_stats_queue qstats;
141 struct gnet_stats_rate_est rate_est;
142 struct Qdisc *qdisc;
143 struct list_head alist;
144 struct qfq_aggregate *agg;
145 int deficit;
146};
147
148struct qfq_aggregate {
149 struct hlist_node next;
150 u64 S, F;
151
152
153
154
155
156 struct qfq_group *grp;
157
158
159 u32 class_weight;
160
161 int lmax;
162
163 u32 inv_w;
164 u32 budgetmax;
165 u32 initial_budget, budget;
166
167 int num_classes;
168 struct list_head active;
169
170 struct hlist_node nonfull_next;
171};
172
173struct qfq_group {
174 u64 S, F;
175 unsigned int slot_shift;
176 unsigned int index;
177 unsigned int front;
178 unsigned long full_slots;
179
180
181 struct hlist_head slots[QFQ_MAX_SLOTS];
182};
183
184struct qfq_sched {
185 struct tcf_proto *filter_list;
186 struct Qdisc_class_hash clhash;
187
188 u64 oldV, V;
189 struct qfq_aggregate *in_serv_agg;
190 u32 num_active_agg;
191 u32 wsum;
192
193 unsigned long bitmaps[QFQ_MAX_STATE];
194 struct qfq_group groups[QFQ_MAX_INDEX + 1];
195 u32 min_slot_shift;
196
197 u32 max_agg_classes;
198 struct hlist_head nonfull_aggs;
199};
200
201
202
203
204
205
206
207
208enum update_reason {enqueue, requeue};
209
210static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
211{
212 struct qfq_sched *q = qdisc_priv(sch);
213 struct Qdisc_class_common *clc;
214
215 clc = qdisc_class_find(&q->clhash, classid);
216 if (clc == NULL)
217 return NULL;
218 return container_of(clc, struct qfq_class, common);
219}
220
221static void qfq_purge_queue(struct qfq_class *cl)
222{
223 unsigned int len = cl->qdisc->q.qlen;
224
225 qdisc_reset(cl->qdisc);
226 qdisc_tree_decrease_qlen(cl->qdisc, len);
227}
228
229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
230 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
231 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
232};
233
234
235
236
237
238
239static int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
240{
241 u64 slot_size = (u64)maxlen * inv_w;
242 unsigned long size_map;
243 int index = 0;
244
245 size_map = slot_size >> min_slot_shift;
246 if (!size_map)
247 goto out;
248
249 index = __fls(size_map) + 1;
250 index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
251
252 if (index < 0)
253 index = 0;
254out:
255 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
256 (unsigned long) ONE_FP/inv_w, maxlen, index);
257
258 return index;
259}
260
261static void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
262static void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
263 enum update_reason);
264
265static void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
266 u32 lmax, u32 weight)
267{
268 INIT_LIST_HEAD(&agg->active);
269 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
270
271 agg->lmax = lmax;
272 agg->class_weight = weight;
273}
274
275static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
276 u32 lmax, u32 weight)
277{
278 struct qfq_aggregate *agg;
279
280 hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
281 if (agg->lmax == lmax && agg->class_weight == weight)
282 return agg;
283
284 return NULL;
285}
286
287
288
289static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
290 int new_num_classes)
291{
292 u32 new_agg_weight;
293
294 if (new_num_classes == q->max_agg_classes)
295 hlist_del_init(&agg->nonfull_next);
296
297 if (agg->num_classes > new_num_classes &&
298 new_num_classes == q->max_agg_classes - 1)
299 hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
300
301
302
303
304
305 agg->budgetmax = new_num_classes * agg->lmax;
306 new_agg_weight = agg->class_weight * new_num_classes;
307 agg->inv_w = ONE_FP/new_agg_weight;
308
309 if (agg->grp == NULL) {
310 int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
311 q->min_slot_shift);
312 agg->grp = &q->groups[i];
313 }
314
315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317
318 agg->num_classes = new_num_classes;
319}
320
321
322static void qfq_add_to_agg(struct qfq_sched *q,
323 struct qfq_aggregate *agg,
324 struct qfq_class *cl)
325{
326 cl->agg = agg;
327
328 qfq_update_agg(q, agg, agg->num_classes+1);
329 if (cl->qdisc->q.qlen > 0) {
330 list_add_tail(&cl->alist, &agg->active);
331 if (list_first_entry(&agg->active, struct qfq_class, alist) ==
332 cl && q->in_serv_agg != agg)
333 qfq_activate_agg(q, agg, enqueue);
334 }
335}
336
337static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
338
339static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
340{
341 if (!hlist_unhashed(&agg->nonfull_next))
342 hlist_del_init(&agg->nonfull_next);
343 if (q->in_serv_agg == agg)
344 q->in_serv_agg = qfq_choose_next_agg(q);
345 kfree(agg);
346}
347
348
349static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
350{
351 struct qfq_aggregate *agg = cl->agg;
352
353
354 list_del(&cl->alist);
355 if (list_empty(&agg->active))
356 qfq_deactivate_agg(q, agg);
357}
358
359
360static void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
361{
362 struct qfq_aggregate *agg = cl->agg;
363
364 cl->agg = NULL;
365 if (agg->num_classes == 1) {
366 qfq_destroy_agg(q, agg);
367 return;
368 }
369 qfq_update_agg(q, agg, agg->num_classes-1);
370}
371
372
373static void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
374{
375 if (cl->qdisc->q.qlen > 0)
376 qfq_deactivate_class(q, cl);
377
378 qfq_rm_from_agg(q, cl);
379}
380
381
382static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
383 u32 lmax)
384{
385 struct qfq_sched *q = qdisc_priv(sch);
386 struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
387
388 if (new_agg == NULL) {
389 new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
390 if (new_agg == NULL)
391 return -ENOBUFS;
392 qfq_init_agg(q, new_agg, lmax, weight);
393 }
394 qfq_deact_rm_from_agg(q, cl);
395 qfq_add_to_agg(q, new_agg, cl);
396
397 return 0;
398}
399
400static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
401 struct nlattr **tca, unsigned long *arg)
402{
403 struct qfq_sched *q = qdisc_priv(sch);
404 struct qfq_class *cl = (struct qfq_class *)*arg;
405 bool existing = false;
406 struct nlattr *tb[TCA_QFQ_MAX + 1];
407 struct qfq_aggregate *new_agg = NULL;
408 u32 weight, lmax, inv_w;
409 int err;
410 int delta_w;
411
412 if (tca[TCA_OPTIONS] == NULL) {
413 pr_notice("qfq: no options\n");
414 return -EINVAL;
415 }
416
417 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
418 if (err < 0)
419 return err;
420
421 if (tb[TCA_QFQ_WEIGHT]) {
422 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
423 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
424 pr_notice("qfq: invalid weight %u\n", weight);
425 return -EINVAL;
426 }
427 } else
428 weight = 1;
429
430 if (tb[TCA_QFQ_LMAX]) {
431 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
432 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
433 pr_notice("qfq: invalid max length %u\n", lmax);
434 return -EINVAL;
435 }
436 } else
437 lmax = psched_mtu(qdisc_dev(sch));
438
439 inv_w = ONE_FP / weight;
440 weight = ONE_FP / inv_w;
441
442 if (cl != NULL &&
443 lmax == cl->agg->lmax &&
444 weight == cl->agg->class_weight)
445 return 0;
446
447 delta_w = weight - (cl ? cl->agg->class_weight : 0);
448
449 if (q->wsum + delta_w > QFQ_MAX_WSUM) {
450 pr_notice("qfq: total weight out of range (%d + %u)\n",
451 delta_w, q->wsum);
452 return -EINVAL;
453 }
454
455 if (cl != NULL) {
456 if (tca[TCA_RATE]) {
457 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
458 qdisc_root_sleeping_lock(sch),
459 tca[TCA_RATE]);
460 if (err)
461 return err;
462 }
463 existing = true;
464 goto set_change_agg;
465 }
466
467
468 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
469 if (cl == NULL)
470 return -ENOBUFS;
471
472 cl->refcnt = 1;
473 cl->common.classid = classid;
474 cl->deficit = lmax;
475
476 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
477 &pfifo_qdisc_ops, classid);
478 if (cl->qdisc == NULL)
479 cl->qdisc = &noop_qdisc;
480
481 if (tca[TCA_RATE]) {
482 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
483 qdisc_root_sleeping_lock(sch),
484 tca[TCA_RATE]);
485 if (err)
486 goto destroy_class;
487 }
488
489 sch_tree_lock(sch);
490 qdisc_class_hash_insert(&q->clhash, &cl->common);
491 sch_tree_unlock(sch);
492
493 qdisc_class_hash_grow(sch, &q->clhash);
494
495set_change_agg:
496 sch_tree_lock(sch);
497 new_agg = qfq_find_agg(q, lmax, weight);
498 if (new_agg == NULL) {
499 sch_tree_unlock(sch);
500 new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
501 if (new_agg == NULL) {
502 err = -ENOBUFS;
503 gen_kill_estimator(&cl->bstats, &cl->rate_est);
504 goto destroy_class;
505 }
506 sch_tree_lock(sch);
507 qfq_init_agg(q, new_agg, lmax, weight);
508 }
509 if (existing)
510 qfq_deact_rm_from_agg(q, cl);
511 qfq_add_to_agg(q, new_agg, cl);
512 sch_tree_unlock(sch);
513
514 *arg = (unsigned long)cl;
515 return 0;
516
517destroy_class:
518 qdisc_destroy(cl->qdisc);
519 kfree(cl);
520 return err;
521}
522
523static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
524{
525 struct qfq_sched *q = qdisc_priv(sch);
526
527 qfq_rm_from_agg(q, cl);
528 gen_kill_estimator(&cl->bstats, &cl->rate_est);
529 qdisc_destroy(cl->qdisc);
530 kfree(cl);
531}
532
533static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
534{
535 struct qfq_sched *q = qdisc_priv(sch);
536 struct qfq_class *cl = (struct qfq_class *)arg;
537
538 if (cl->filter_cnt > 0)
539 return -EBUSY;
540
541 sch_tree_lock(sch);
542
543 qfq_purge_queue(cl);
544 qdisc_class_hash_remove(&q->clhash, &cl->common);
545
546 BUG_ON(--cl->refcnt == 0);
547
548
549
550
551
552 sch_tree_unlock(sch);
553 return 0;
554}
555
556static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
557{
558 struct qfq_class *cl = qfq_find_class(sch, classid);
559
560 if (cl != NULL)
561 cl->refcnt++;
562
563 return (unsigned long)cl;
564}
565
566static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
567{
568 struct qfq_class *cl = (struct qfq_class *)arg;
569
570 if (--cl->refcnt == 0)
571 qfq_destroy_class(sch, cl);
572}
573
574static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
575{
576 struct qfq_sched *q = qdisc_priv(sch);
577
578 if (cl)
579 return NULL;
580
581 return &q->filter_list;
582}
583
584static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
585 u32 classid)
586{
587 struct qfq_class *cl = qfq_find_class(sch, classid);
588
589 if (cl != NULL)
590 cl->filter_cnt++;
591
592 return (unsigned long)cl;
593}
594
595static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
596{
597 struct qfq_class *cl = (struct qfq_class *)arg;
598
599 cl->filter_cnt--;
600}
601
602static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
603 struct Qdisc *new, struct Qdisc **old)
604{
605 struct qfq_class *cl = (struct qfq_class *)arg;
606
607 if (new == NULL) {
608 new = qdisc_create_dflt(sch->dev_queue,
609 &pfifo_qdisc_ops, cl->common.classid);
610 if (new == NULL)
611 new = &noop_qdisc;
612 }
613
614 sch_tree_lock(sch);
615 qfq_purge_queue(cl);
616 *old = cl->qdisc;
617 cl->qdisc = new;
618 sch_tree_unlock(sch);
619 return 0;
620}
621
622static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
623{
624 struct qfq_class *cl = (struct qfq_class *)arg;
625
626 return cl->qdisc;
627}
628
629static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
630 struct sk_buff *skb, struct tcmsg *tcm)
631{
632 struct qfq_class *cl = (struct qfq_class *)arg;
633 struct nlattr *nest;
634
635 tcm->tcm_parent = TC_H_ROOT;
636 tcm->tcm_handle = cl->common.classid;
637 tcm->tcm_info = cl->qdisc->handle;
638
639 nest = nla_nest_start(skb, TCA_OPTIONS);
640 if (nest == NULL)
641 goto nla_put_failure;
642 if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
643 nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
644 goto nla_put_failure;
645 return nla_nest_end(skb, nest);
646
647nla_put_failure:
648 nla_nest_cancel(skb, nest);
649 return -EMSGSIZE;
650}
651
652static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
653 struct gnet_dump *d)
654{
655 struct qfq_class *cl = (struct qfq_class *)arg;
656 struct tc_qfq_stats xstats;
657
658 memset(&xstats, 0, sizeof(xstats));
659 cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
660
661 xstats.weight = cl->agg->class_weight;
662 xstats.lmax = cl->agg->lmax;
663
664 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
665 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
666 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
667 return -1;
668
669 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
670}
671
672static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
673{
674 struct qfq_sched *q = qdisc_priv(sch);
675 struct qfq_class *cl;
676 unsigned int i;
677
678 if (arg->stop)
679 return;
680
681 for (i = 0; i < q->clhash.hashsize; i++) {
682 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
683 if (arg->count < arg->skip) {
684 arg->count++;
685 continue;
686 }
687 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
688 arg->stop = 1;
689 return;
690 }
691 arg->count++;
692 }
693 }
694}
695
696static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
697 int *qerr)
698{
699 struct qfq_sched *q = qdisc_priv(sch);
700 struct qfq_class *cl;
701 struct tcf_result res;
702 int result;
703
704 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
705 pr_debug("qfq_classify: found %d\n", skb->priority);
706 cl = qfq_find_class(sch, skb->priority);
707 if (cl != NULL)
708 return cl;
709 }
710
711 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
712 result = tc_classify(skb, q->filter_list, &res);
713 if (result >= 0) {
714#ifdef CONFIG_NET_CLS_ACT
715 switch (result) {
716 case TC_ACT_QUEUED:
717 case TC_ACT_STOLEN:
718 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
719 case TC_ACT_SHOT:
720 return NULL;
721 }
722#endif
723 cl = (struct qfq_class *)res.class;
724 if (cl == NULL)
725 cl = qfq_find_class(sch, res.classid);
726 return cl;
727 }
728
729 return NULL;
730}
731
732
733static inline int qfq_gt(u64 a, u64 b)
734{
735 return (s64)(a - b) > 0;
736}
737
738
739static inline u64 qfq_round_down(u64 ts, unsigned int shift)
740{
741 return ts & ~((1ULL << shift) - 1);
742}
743
744
745static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
746 unsigned long bitmap)
747{
748 int index = __ffs(bitmap);
749 return &q->groups[index];
750}
751
752static inline unsigned long mask_from(unsigned long bitmap, int from)
753{
754 return bitmap & ~((1UL << from) - 1);
755}
756
757
758
759
760
761
762static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
763{
764
765 unsigned int state = qfq_gt(grp->S, q->V);
766 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
767 struct qfq_group *next;
768
769 if (mask) {
770 next = qfq_ffs(q, mask);
771 if (qfq_gt(grp->F, next->F))
772 state |= EB;
773 }
774
775 return state;
776}
777
778
779
780
781
782
783
784
785static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
786 int src, int dst)
787{
788 q->bitmaps[dst] |= q->bitmaps[src] & mask;
789 q->bitmaps[src] &= ~mask;
790}
791
792static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
793{
794 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
795 struct qfq_group *next;
796
797 if (mask) {
798 next = qfq_ffs(q, mask);
799 if (!qfq_gt(next->F, old_F))
800 return;
801 }
802
803 mask = (1UL << index) - 1;
804 qfq_move_groups(q, mask, EB, ER);
805 qfq_move_groups(q, mask, IB, IR);
806}
807
808
809
810
811
812
813
814
815
816
817
818static void qfq_make_eligible(struct qfq_sched *q)
819{
820 unsigned long vslot = q->V >> q->min_slot_shift;
821 unsigned long old_vslot = q->oldV >> q->min_slot_shift;
822
823 if (vslot != old_vslot) {
824 unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1;
825 qfq_move_groups(q, mask, IR, ER);
826 qfq_move_groups(q, mask, IB, EB);
827 }
828}
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
864 u64 roundedS)
865{
866 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
867 unsigned int i;
868
869 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
870 u64 deltaS = roundedS - grp->S -
871 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
872 agg->S -= deltaS;
873 agg->F -= deltaS;
874 slot = QFQ_MAX_SLOTS - 2;
875 }
876
877 i = (grp->front + slot) % QFQ_MAX_SLOTS;
878
879 hlist_add_head(&agg->next, &grp->slots[i]);
880 __set_bit(slot, &grp->full_slots);
881}
882
883
884static struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
885{
886 return hlist_entry(grp->slots[grp->front].first,
887 struct qfq_aggregate, next);
888}
889
890
891
892
893static void qfq_front_slot_remove(struct qfq_group *grp)
894{
895 struct qfq_aggregate *agg = qfq_slot_head(grp);
896
897 BUG_ON(!agg);
898 hlist_del(&agg->next);
899 if (hlist_empty(&grp->slots[grp->front]))
900 __clear_bit(0, &grp->full_slots);
901}
902
903
904
905
906
907
908static struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
909{
910 unsigned int i;
911
912 pr_debug("qfq slot_scan: grp %u full %#lx\n",
913 grp->index, grp->full_slots);
914
915 if (grp->full_slots == 0)
916 return NULL;
917
918 i = __ffs(grp->full_slots);
919 if (i > 0) {
920 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
921 grp->full_slots >>= i;
922 }
923
924 return qfq_slot_head(grp);
925}
926
927
928
929
930
931
932
933
934
935
936static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
937{
938 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
939
940 grp->full_slots <<= i;
941 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
942}
943
944static void qfq_update_eligible(struct qfq_sched *q)
945{
946 struct qfq_group *grp;
947 unsigned long ineligible;
948
949 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
950 if (ineligible) {
951 if (!q->bitmaps[ER]) {
952 grp = qfq_ffs(q, ineligible);
953 if (qfq_gt(grp->S, q->V))
954 q->V = grp->S;
955 }
956 qfq_make_eligible(q);
957 }
958}
959
960
961static void agg_dequeue(struct qfq_aggregate *agg,
962 struct qfq_class *cl, unsigned int len)
963{
964 qdisc_dequeue_peeked(cl->qdisc);
965
966 cl->deficit -= (int) len;
967
968 if (cl->qdisc->q.qlen == 0)
969 list_del(&cl->alist);
970 else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
971 cl->deficit += agg->lmax;
972 list_move_tail(&cl->alist, &agg->active);
973 }
974}
975
976static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
977 struct qfq_class **cl,
978 unsigned int *len)
979{
980 struct sk_buff *skb;
981
982 *cl = list_first_entry(&agg->active, struct qfq_class, alist);
983 skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
984 if (skb == NULL)
985 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
986 else
987 *len = qdisc_pkt_len(skb);
988
989 return skb;
990}
991
992
993static inline void charge_actual_service(struct qfq_aggregate *agg)
994{
995
996
997
998
999
1000 u32 service_received = min(agg->budgetmax,
1001 agg->initial_budget - agg->budget);
1002
1003 agg->F = agg->S + (u64)service_received * agg->inv_w;
1004}
1005
1006static inline void qfq_update_agg_ts(struct qfq_sched *q,
1007 struct qfq_aggregate *agg,
1008 enum update_reason reason);
1009
1010static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
1011
1012static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1013{
1014 struct qfq_sched *q = qdisc_priv(sch);
1015 struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
1016 struct qfq_class *cl;
1017 struct sk_buff *skb = NULL;
1018
1019 unsigned int len = 0;
1020
1021 if (in_serv_agg == NULL)
1022 return NULL;
1023
1024 if (!list_empty(&in_serv_agg->active))
1025 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1026
1027
1028
1029
1030
1031
1032 if (len == 0 || in_serv_agg->budget < len) {
1033 charge_actual_service(in_serv_agg);
1034
1035
1036 in_serv_agg->initial_budget = in_serv_agg->budget =
1037 in_serv_agg->budgetmax;
1038
1039 if (!list_empty(&in_serv_agg->active)) {
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050 qfq_update_agg_ts(q, in_serv_agg, requeue);
1051 qfq_schedule_agg(q, in_serv_agg);
1052 } else if (sch->q.qlen == 0) {
1053 q->in_serv_agg = NULL;
1054 return NULL;
1055 }
1056
1057
1058
1059
1060
1061 in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
1062 skb = qfq_peek_skb(in_serv_agg, &cl, &len);
1063 }
1064 if (!skb)
1065 return NULL;
1066
1067 sch->q.qlen--;
1068 qdisc_bstats_update(sch, skb);
1069
1070 agg_dequeue(in_serv_agg, cl, len);
1071
1072
1073
1074
1075 if (unlikely(in_serv_agg->budget < len))
1076 in_serv_agg->budget = 0;
1077 else
1078 in_serv_agg->budget -= len;
1079
1080 q->V += (u64)len * IWSUM;
1081 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1082 len, (unsigned long long) in_serv_agg->F,
1083 (unsigned long long) q->V);
1084
1085 return skb;
1086}
1087
1088static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
1089{
1090 struct qfq_group *grp;
1091 struct qfq_aggregate *agg, *new_front_agg;
1092 u64 old_F;
1093
1094 qfq_update_eligible(q);
1095 q->oldV = q->V;
1096
1097 if (!q->bitmaps[ER])
1098 return NULL;
1099
1100 grp = qfq_ffs(q, q->bitmaps[ER]);
1101 old_F = grp->F;
1102
1103 agg = qfq_slot_head(grp);
1104
1105
1106 qfq_front_slot_remove(grp);
1107
1108 new_front_agg = qfq_slot_scan(grp);
1109
1110 if (new_front_agg == NULL)
1111 __clear_bit(grp->index, &q->bitmaps[ER]);
1112 else {
1113 u64 roundedS = qfq_round_down(new_front_agg->S,
1114 grp->slot_shift);
1115 unsigned int s;
1116
1117 if (grp->S == roundedS)
1118 return agg;
1119 grp->S = roundedS;
1120 grp->F = roundedS + (2ULL << grp->slot_shift);
1121 __clear_bit(grp->index, &q->bitmaps[ER]);
1122 s = qfq_calc_state(q, grp);
1123 __set_bit(grp->index, &q->bitmaps[s]);
1124 }
1125
1126 qfq_unblock_groups(q, grp->index, old_F);
1127
1128 return agg;
1129}
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
1145{
1146 unsigned long mask;
1147 u64 limit, roundedF;
1148 int slot_shift = agg->grp->slot_shift;
1149
1150 roundedF = qfq_round_down(agg->F, slot_shift);
1151 limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
1152
1153 if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
1154
1155 mask = mask_from(q->bitmaps[ER], agg->grp->index);
1156 if (mask) {
1157 struct qfq_group *next = qfq_ffs(q, mask);
1158 if (qfq_gt(roundedF, next->F)) {
1159 if (qfq_gt(limit, next->F))
1160 agg->S = next->F;
1161 else
1162 agg->S = limit;
1163 return;
1164 }
1165 }
1166 agg->S = q->V;
1167 } else
1168 agg->S = agg->F;
1169}
1170
1171
1172
1173
1174
1175
1176
1177static inline void
1178qfq_update_agg_ts(struct qfq_sched *q,
1179 struct qfq_aggregate *agg, enum update_reason reason)
1180{
1181 if (reason != requeue)
1182 qfq_update_start(q, agg);
1183 else
1184 agg->S = agg->F;
1185
1186 agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
1187}
1188
1189static void qfq_schedule_agg(struct qfq_sched *, struct qfq_aggregate *);
1190
1191static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1192{
1193 struct qfq_sched *q = qdisc_priv(sch);
1194 struct qfq_class *cl;
1195 struct qfq_aggregate *agg;
1196 int err = 0;
1197
1198 cl = qfq_classify(skb, sch, &err);
1199 if (cl == NULL) {
1200 if (err & __NET_XMIT_BYPASS)
1201 sch->qstats.drops++;
1202 kfree_skb(skb);
1203 return err;
1204 }
1205 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
1206
1207 if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) {
1208 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
1209 cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid);
1210 err = qfq_change_agg(sch, cl, cl->agg->class_weight,
1211 qdisc_pkt_len(skb));
1212 if (err)
1213 return err;
1214 }
1215
1216 err = qdisc_enqueue(skb, cl->qdisc);
1217 if (unlikely(err != NET_XMIT_SUCCESS)) {
1218 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
1219 if (net_xmit_drop_count(err)) {
1220 cl->qstats.drops++;
1221 sch->qstats.drops++;
1222 }
1223 return err;
1224 }
1225
1226 bstats_update(&cl->bstats, skb);
1227 ++sch->q.qlen;
1228
1229 agg = cl->agg;
1230
1231 if (cl->qdisc->q.qlen != 1) {
1232 if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
1233 list_first_entry(&agg->active, struct qfq_class, alist)
1234 == cl && cl->deficit < qdisc_pkt_len(skb))
1235 list_move_tail(&cl->alist, &agg->active);
1236
1237 return err;
1238 }
1239
1240
1241 cl->deficit = agg->lmax;
1242 list_add_tail(&cl->alist, &agg->active);
1243
1244 if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
1245 q->in_serv_agg == agg)
1246 return err;
1247
1248 qfq_activate_agg(q, agg, enqueue);
1249
1250 return err;
1251}
1252
1253
1254
1255
1256static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1257{
1258 struct qfq_group *grp = agg->grp;
1259 u64 roundedS;
1260 int s;
1261
1262 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273 if (grp->full_slots) {
1274 if (!qfq_gt(grp->S, agg->S))
1275 goto skip_update;
1276
1277
1278 qfq_slot_rotate(grp, roundedS);
1279
1280 __clear_bit(grp->index, &q->bitmaps[IR]);
1281 __clear_bit(grp->index, &q->bitmaps[IB]);
1282 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
1283 q->in_serv_agg == NULL)
1284 q->V = roundedS;
1285
1286 grp->S = roundedS;
1287 grp->F = roundedS + (2ULL << grp->slot_shift);
1288 s = qfq_calc_state(q, grp);
1289 __set_bit(grp->index, &q->bitmaps[s]);
1290
1291 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
1292 s, q->bitmaps[s],
1293 (unsigned long long) agg->S,
1294 (unsigned long long) agg->F,
1295 (unsigned long long) q->V);
1296
1297skip_update:
1298 qfq_slot_insert(grp, agg, roundedS);
1299}
1300
1301
1302
1303static void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
1304 enum update_reason reason)
1305{
1306 agg->initial_budget = agg->budget = agg->budgetmax;
1307
1308 qfq_update_agg_ts(q, agg, reason);
1309 if (q->in_serv_agg == NULL) {
1310 q->in_serv_agg = agg;
1311
1312 q->oldV = q->V = agg->S;
1313 } else if (agg != q->in_serv_agg)
1314 qfq_schedule_agg(q, agg);
1315}
1316
1317static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
1318 struct qfq_aggregate *agg)
1319{
1320 unsigned int i, offset;
1321 u64 roundedS;
1322
1323 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1324 offset = (roundedS - grp->S) >> grp->slot_shift;
1325
1326 i = (grp->front + offset) % QFQ_MAX_SLOTS;
1327
1328 hlist_del(&agg->next);
1329 if (hlist_empty(&grp->slots[i]))
1330 __clear_bit(offset, &grp->full_slots);
1331}
1332
1333
1334
1335
1336
1337
1338
1339
1340static void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
1341{
1342 struct qfq_group *grp = agg->grp;
1343 unsigned long mask;
1344 u64 roundedS;
1345 int s;
1346
1347 if (agg == q->in_serv_agg) {
1348 charge_actual_service(agg);
1349 q->in_serv_agg = qfq_choose_next_agg(q);
1350 return;
1351 }
1352
1353 agg->F = agg->S;
1354 qfq_slot_remove(q, grp, agg);
1355
1356 if (!grp->full_slots) {
1357 __clear_bit(grp->index, &q->bitmaps[IR]);
1358 __clear_bit(grp->index, &q->bitmaps[EB]);
1359 __clear_bit(grp->index, &q->bitmaps[IB]);
1360
1361 if (test_bit(grp->index, &q->bitmaps[ER]) &&
1362 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
1363 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
1364 if (mask)
1365 mask = ~((1UL << __fls(mask)) - 1);
1366 else
1367 mask = ~0UL;
1368 qfq_move_groups(q, mask, EB, ER);
1369 qfq_move_groups(q, mask, IB, IR);
1370 }
1371 __clear_bit(grp->index, &q->bitmaps[ER]);
1372 } else if (hlist_empty(&grp->slots[grp->front])) {
1373 agg = qfq_slot_scan(grp);
1374 roundedS = qfq_round_down(agg->S, grp->slot_shift);
1375 if (grp->S != roundedS) {
1376 __clear_bit(grp->index, &q->bitmaps[ER]);
1377 __clear_bit(grp->index, &q->bitmaps[IR]);
1378 __clear_bit(grp->index, &q->bitmaps[EB]);
1379 __clear_bit(grp->index, &q->bitmaps[IB]);
1380 grp->S = roundedS;
1381 grp->F = roundedS + (2ULL << grp->slot_shift);
1382 s = qfq_calc_state(q, grp);
1383 __set_bit(grp->index, &q->bitmaps[s]);
1384 }
1385 }
1386}
1387
1388static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1389{
1390 struct qfq_sched *q = qdisc_priv(sch);
1391 struct qfq_class *cl = (struct qfq_class *)arg;
1392
1393 if (cl->qdisc->q.qlen == 0)
1394 qfq_deactivate_class(q, cl);
1395}
1396
1397static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
1398 struct hlist_head *slot)
1399{
1400 struct qfq_aggregate *agg;
1401 struct qfq_class *cl;
1402 unsigned int len;
1403
1404 hlist_for_each_entry(agg, slot, next) {
1405 list_for_each_entry(cl, &agg->active, alist) {
1406
1407 if (!cl->qdisc->ops->drop)
1408 continue;
1409
1410 len = cl->qdisc->ops->drop(cl->qdisc);
1411 if (len > 0) {
1412 if (cl->qdisc->q.qlen == 0)
1413 qfq_deactivate_class(q, cl);
1414
1415 return len;
1416 }
1417 }
1418 }
1419 return 0;
1420}
1421
1422static unsigned int qfq_drop(struct Qdisc *sch)
1423{
1424 struct qfq_sched *q = qdisc_priv(sch);
1425 struct qfq_group *grp;
1426 unsigned int i, j, len;
1427
1428 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1429 grp = &q->groups[i];
1430 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1431 len = qfq_drop_from_slot(q, &grp->slots[j]);
1432 if (len > 0) {
1433 sch->q.qlen--;
1434 return len;
1435 }
1436 }
1437
1438 }
1439
1440 return 0;
1441}
1442
1443static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1444{
1445 struct qfq_sched *q = qdisc_priv(sch);
1446 struct qfq_group *grp;
1447 int i, j, err;
1448 u32 max_cl_shift, maxbudg_shift, max_classes;
1449
1450 err = qdisc_class_hash_init(&q->clhash);
1451 if (err < 0)
1452 return err;
1453
1454 if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
1455 max_classes = QFQ_MAX_AGG_CLASSES;
1456 else
1457 max_classes = qdisc_dev(sch)->tx_queue_len + 1;
1458
1459 max_cl_shift = __fls(max_classes);
1460 q->max_agg_classes = 1<<max_cl_shift;
1461
1462
1463 maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
1464 q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
1465
1466 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1467 grp = &q->groups[i];
1468 grp->index = i;
1469 grp->slot_shift = q->min_slot_shift + i;
1470 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1471 INIT_HLIST_HEAD(&grp->slots[j]);
1472 }
1473
1474 INIT_HLIST_HEAD(&q->nonfull_aggs);
1475
1476 return 0;
1477}
1478
1479static void qfq_reset_qdisc(struct Qdisc *sch)
1480{
1481 struct qfq_sched *q = qdisc_priv(sch);
1482 struct qfq_class *cl;
1483 unsigned int i;
1484
1485 for (i = 0; i < q->clhash.hashsize; i++) {
1486 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1487 if (cl->qdisc->q.qlen > 0)
1488 qfq_deactivate_class(q, cl);
1489
1490 qdisc_reset(cl->qdisc);
1491 }
1492 }
1493 sch->q.qlen = 0;
1494}
1495
1496static void qfq_destroy_qdisc(struct Qdisc *sch)
1497{
1498 struct qfq_sched *q = qdisc_priv(sch);
1499 struct qfq_class *cl;
1500 struct hlist_node *next;
1501 unsigned int i;
1502
1503 tcf_destroy_chain(&q->filter_list);
1504
1505 for (i = 0; i < q->clhash.hashsize; i++) {
1506 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1507 common.hnode) {
1508 qfq_destroy_class(sch, cl);
1509 }
1510 }
1511 qdisc_class_hash_destroy(&q->clhash);
1512}
1513
1514static const struct Qdisc_class_ops qfq_class_ops = {
1515 .change = qfq_change_class,
1516 .delete = qfq_delete_class,
1517 .get = qfq_get_class,
1518 .put = qfq_put_class,
1519 .tcf_chain = qfq_tcf_chain,
1520 .bind_tcf = qfq_bind_tcf,
1521 .unbind_tcf = qfq_unbind_tcf,
1522 .graft = qfq_graft_class,
1523 .leaf = qfq_class_leaf,
1524 .qlen_notify = qfq_qlen_notify,
1525 .dump = qfq_dump_class,
1526 .dump_stats = qfq_dump_class_stats,
1527 .walk = qfq_walk,
1528};
1529
1530static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1531 .cl_ops = &qfq_class_ops,
1532 .id = "qfq",
1533 .priv_size = sizeof(struct qfq_sched),
1534 .enqueue = qfq_enqueue,
1535 .dequeue = qfq_dequeue,
1536 .peek = qdisc_peek_dequeued,
1537 .drop = qfq_drop,
1538 .init = qfq_init_qdisc,
1539 .reset = qfq_reset_qdisc,
1540 .destroy = qfq_destroy_qdisc,
1541 .owner = THIS_MODULE,
1542};
1543
1544static int __init qfq_init(void)
1545{
1546 return register_qdisc(&qfq_qdisc_ops);
1547}
1548
1549static void __exit qfq_exit(void)
1550{
1551 unregister_qdisc(&qfq_qdisc_ops);
1552}
1553
1554module_init(qfq_init);
1555module_exit(qfq_exit);
1556MODULE_LICENSE("GPL");
1557