1
2
3
4
5
6
7
8
9
10
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/jiffies.h>
16#include <linux/string.h>
17#include <linux/in.h>
18#include <linux/errno.h>
19#include <linux/init.h>
20#include <linux/ipv6.h>
21#include <linux/skbuff.h>
22#include <linux/jhash.h>
23#include <linux/slab.h>
24#include <net/ip.h>
25#include <net/netlink.h>
26#include <net/pkt_sched.h>
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76#define SFQ_DEPTH 128
77#define SFQ_SLOTS 128
78#define SFQ_EMPTY_SLOT 255
79#define SFQ_HASH_DIVISOR 1024
80
81
82
83#define SFQ_ALLOT_SHIFT 3
84#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
85
86
87typedef unsigned char sfq_index;
88
89
90
91
92
93
94
95struct sfq_head
96{
97 sfq_index next;
98 sfq_index prev;
99};
100
101struct sfq_slot {
102 struct sk_buff *skblist_next;
103 struct sk_buff *skblist_prev;
104 sfq_index qlen;
105 sfq_index next;
106 struct sfq_head dep;
107 unsigned short hash;
108 short allot;
109};
110
111struct sfq_sched_data
112{
113
114 int perturb_period;
115 unsigned quantum;
116 int limit;
117
118
119 struct tcf_proto *filter_list;
120 struct timer_list perturb_timer;
121 u32 perturbation;
122 sfq_index cur_depth;
123 unsigned short scaled_quantum;
124 struct sfq_slot *tail;
125 sfq_index ht[SFQ_HASH_DIVISOR];
126 struct sfq_slot slots[SFQ_SLOTS];
127 struct sfq_head dep[SFQ_DEPTH];
128};
129
130
131
132
133static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
134{
135 if (val < SFQ_SLOTS)
136 return &q->slots[val].dep;
137 return &q->dep[val - SFQ_SLOTS];
138}
139
140static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
141{
142 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
143}
144
145static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
146{
147 u32 h, h2;
148
149 switch (skb->protocol) {
150 case htons(ETH_P_IP):
151 {
152 const struct iphdr *iph;
153 int poff;
154
155 if (!pskb_network_may_pull(skb, sizeof(*iph)))
156 goto err;
157 iph = ip_hdr(skb);
158 h = (__force u32)iph->daddr;
159 h2 = (__force u32)iph->saddr ^ iph->protocol;
160 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
161 break;
162 poff = proto_ports_offset(iph->protocol);
163 if (poff >= 0 &&
164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
165 iph = ip_hdr(skb);
166 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
167 }
168 break;
169 }
170 case htons(ETH_P_IPV6):
171 {
172 struct ipv6hdr *iph;
173 int poff;
174
175 if (!pskb_network_may_pull(skb, sizeof(*iph)))
176 goto err;
177 iph = ipv6_hdr(skb);
178 h = (__force u32)iph->daddr.s6_addr32[3];
179 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
180 poff = proto_ports_offset(iph->nexthdr);
181 if (poff >= 0 &&
182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
183 iph = ipv6_hdr(skb);
184 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
185 }
186 break;
187 }
188 default:
189err:
190 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
191 h2 = (unsigned long)skb->sk;
192 }
193
194 return sfq_fold_hash(q, h, h2);
195}
196
197static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
198 int *qerr)
199{
200 struct sfq_sched_data *q = qdisc_priv(sch);
201 struct tcf_result res;
202 int result;
203
204 if (TC_H_MAJ(skb->priority) == sch->handle &&
205 TC_H_MIN(skb->priority) > 0 &&
206 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
207 return TC_H_MIN(skb->priority);
208
209 if (!q->filter_list)
210 return sfq_hash(q, skb) + 1;
211
212 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
213 result = tc_classify(skb, q->filter_list, &res);
214 if (result >= 0) {
215#ifdef CONFIG_NET_CLS_ACT
216 switch (result) {
217 case TC_ACT_STOLEN:
218 case TC_ACT_QUEUED:
219 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
220 case TC_ACT_SHOT:
221 return 0;
222 }
223#endif
224 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
225 return TC_H_MIN(res.classid);
226 }
227 return 0;
228}
229
230
231
232
233static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
234{
235 sfq_index p, n;
236 int qlen = q->slots[x].qlen;
237
238 p = qlen + SFQ_SLOTS;
239 n = q->dep[qlen].next;
240
241 q->slots[x].dep.next = n;
242 q->slots[x].dep.prev = p;
243
244 q->dep[qlen].next = x;
245 sfq_dep_head(q, n)->prev = x;
246}
247
248#define sfq_unlink(q, x, n, p) \
249 n = q->slots[x].dep.next; \
250 p = q->slots[x].dep.prev; \
251 sfq_dep_head(q, p)->next = n; \
252 sfq_dep_head(q, n)->prev = p
253
254
255static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
256{
257 sfq_index p, n;
258 int d;
259
260 sfq_unlink(q, x, n, p);
261
262 d = q->slots[x].qlen--;
263 if (n == p && q->cur_depth == d)
264 q->cur_depth--;
265 sfq_link(q, x);
266}
267
268static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
269{
270 sfq_index p, n;
271 int d;
272
273 sfq_unlink(q, x, n, p);
274
275 d = ++q->slots[x].qlen;
276 if (q->cur_depth < d)
277 q->cur_depth = d;
278 sfq_link(q, x);
279}
280
281
282
283
284static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
285{
286 struct sk_buff *skb = slot->skblist_prev;
287
288 slot->skblist_prev = skb->prev;
289 skb->prev->next = (struct sk_buff *)slot;
290 skb->next = skb->prev = NULL;
291 return skb;
292}
293
294
295static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
296{
297 struct sk_buff *skb = slot->skblist_next;
298
299 slot->skblist_next = skb->next;
300 skb->next->prev = (struct sk_buff *)slot;
301 skb->next = skb->prev = NULL;
302 return skb;
303}
304
305static inline void slot_queue_init(struct sfq_slot *slot)
306{
307 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
308}
309
310
311static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
312{
313 skb->prev = slot->skblist_prev;
314 skb->next = (struct sk_buff *)slot;
315 slot->skblist_prev->next = skb;
316 slot->skblist_prev = skb;
317}
318
319#define slot_queue_walk(slot, skb) \
320 for (skb = slot->skblist_next; \
321 skb != (struct sk_buff *)slot; \
322 skb = skb->next)
323
324static unsigned int sfq_drop(struct Qdisc *sch)
325{
326 struct sfq_sched_data *q = qdisc_priv(sch);
327 sfq_index x, d = q->cur_depth;
328 struct sk_buff *skb;
329 unsigned int len;
330 struct sfq_slot *slot;
331
332
333 if (d > 1) {
334 x = q->dep[d].next;
335 slot = &q->slots[x];
336drop:
337 skb = slot_dequeue_tail(slot);
338 len = qdisc_pkt_len(skb);
339 sfq_dec(q, x);
340 kfree_skb(skb);
341 sch->q.qlen--;
342 sch->qstats.drops++;
343 sch->qstats.backlog -= len;
344 return len;
345 }
346
347 if (d == 1) {
348
349 x = q->tail->next;
350 slot = &q->slots[x];
351 q->tail->next = slot->next;
352 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
353 goto drop;
354 }
355
356 return 0;
357}
358
359static int
360sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
361{
362 struct sfq_sched_data *q = qdisc_priv(sch);
363 unsigned int hash;
364 sfq_index x;
365 struct sfq_slot *slot;
366 int uninitialized_var(ret);
367
368 hash = sfq_classify(skb, sch, &ret);
369 if (hash == 0) {
370 if (ret & __NET_XMIT_BYPASS)
371 sch->qstats.drops++;
372 kfree_skb(skb);
373 return ret;
374 }
375 hash--;
376
377 x = q->ht[hash];
378 slot = &q->slots[x];
379 if (x == SFQ_EMPTY_SLOT) {
380 x = q->dep[0].next;
381 q->ht[hash] = x;
382 slot = &q->slots[x];
383 slot->hash = hash;
384 }
385
386
387
388
389 if (slot->qlen >= q->limit)
390 return qdisc_drop(skb, sch);
391
392 sch->qstats.backlog += qdisc_pkt_len(skb);
393 slot_queue_add(slot, skb);
394 sfq_inc(q, x);
395 if (slot->qlen == 1) {
396 if (q->tail == NULL) {
397 slot->next = x;
398 } else {
399 slot->next = q->tail->next;
400 q->tail->next = x;
401 }
402 q->tail = slot;
403 slot->allot = q->scaled_quantum;
404 }
405 if (++sch->q.qlen <= q->limit)
406 return NET_XMIT_SUCCESS;
407
408 sfq_drop(sch);
409 return NET_XMIT_CN;
410}
411
412static struct sk_buff *
413sfq_peek(struct Qdisc *sch)
414{
415 struct sfq_sched_data *q = qdisc_priv(sch);
416
417
418 if (q->tail == NULL)
419 return NULL;
420
421 return q->slots[q->tail->next].skblist_next;
422}
423
424static struct sk_buff *
425sfq_dequeue(struct Qdisc *sch)
426{
427 struct sfq_sched_data *q = qdisc_priv(sch);
428 struct sk_buff *skb;
429 sfq_index a, next_a;
430 struct sfq_slot *slot;
431
432
433 if (q->tail == NULL)
434 return NULL;
435
436next_slot:
437 a = q->tail->next;
438 slot = &q->slots[a];
439 if (slot->allot <= 0) {
440 q->tail = slot;
441 slot->allot += q->scaled_quantum;
442 goto next_slot;
443 }
444 skb = slot_dequeue_head(slot);
445 sfq_dec(q, a);
446 qdisc_bstats_update(sch, skb);
447 sch->q.qlen--;
448 sch->qstats.backlog -= qdisc_pkt_len(skb);
449
450
451 if (slot->qlen == 0) {
452 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
453 next_a = slot->next;
454 if (a == next_a) {
455 q->tail = NULL;
456 return skb;
457 }
458 q->tail->next = next_a;
459 } else {
460 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
461 }
462 return skb;
463}
464
465static void
466sfq_reset(struct Qdisc *sch)
467{
468 struct sk_buff *skb;
469
470 while ((skb = sfq_dequeue(sch)) != NULL)
471 kfree_skb(skb);
472}
473
474static void sfq_perturbation(unsigned long arg)
475{
476 struct Qdisc *sch = (struct Qdisc *)arg;
477 struct sfq_sched_data *q = qdisc_priv(sch);
478
479 q->perturbation = net_random();
480
481 if (q->perturb_period)
482 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
483}
484
485static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
486{
487 struct sfq_sched_data *q = qdisc_priv(sch);
488 struct tc_sfq_qopt *ctl = nla_data(opt);
489 unsigned int qlen;
490
491 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
492 return -EINVAL;
493
494 sch_tree_lock(sch);
495 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
496 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
497 q->perturb_period = ctl->perturb_period * HZ;
498 if (ctl->limit)
499 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
500
501 qlen = sch->q.qlen;
502 while (sch->q.qlen > q->limit)
503 sfq_drop(sch);
504 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
505
506 del_timer(&q->perturb_timer);
507 if (q->perturb_period) {
508 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
509 q->perturbation = net_random();
510 }
511 sch_tree_unlock(sch);
512 return 0;
513}
514
515static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
516{
517 struct sfq_sched_data *q = qdisc_priv(sch);
518 int i;
519
520 q->perturb_timer.function = sfq_perturbation;
521 q->perturb_timer.data = (unsigned long)sch;
522 init_timer_deferrable(&q->perturb_timer);
523
524 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
525 q->ht[i] = SFQ_EMPTY_SLOT;
526
527 for (i = 0; i < SFQ_DEPTH; i++) {
528 q->dep[i].next = i + SFQ_SLOTS;
529 q->dep[i].prev = i + SFQ_SLOTS;
530 }
531
532 q->limit = SFQ_DEPTH - 1;
533 q->cur_depth = 0;
534 q->tail = NULL;
535 if (opt == NULL) {
536 q->quantum = psched_mtu(qdisc_dev(sch));
537 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
538 q->perturb_period = 0;
539 q->perturbation = net_random();
540 } else {
541 int err = sfq_change(sch, opt);
542 if (err)
543 return err;
544 }
545
546 for (i = 0; i < SFQ_SLOTS; i++) {
547 slot_queue_init(&q->slots[i]);
548 sfq_link(q, i);
549 }
550 return 0;
551}
552
553static void sfq_destroy(struct Qdisc *sch)
554{
555 struct sfq_sched_data *q = qdisc_priv(sch);
556
557 tcf_destroy_chain(&q->filter_list);
558 q->perturb_period = 0;
559 del_timer_sync(&q->perturb_timer);
560}
561
562static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
563{
564 struct sfq_sched_data *q = qdisc_priv(sch);
565 unsigned char *b = skb_tail_pointer(skb);
566 struct tc_sfq_qopt opt;
567
568 opt.quantum = q->quantum;
569 opt.perturb_period = q->perturb_period / HZ;
570
571 opt.limit = q->limit;
572 opt.divisor = SFQ_HASH_DIVISOR;
573 opt.flows = q->limit;
574
575 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
576
577 return skb->len;
578
579nla_put_failure:
580 nlmsg_trim(skb, b);
581 return -1;
582}
583
584static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
585{
586 return NULL;
587}
588
589static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
590{
591 return 0;
592}
593
594static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
595 u32 classid)
596{
597 return 0;
598}
599
600static void sfq_put(struct Qdisc *q, unsigned long cl)
601{
602}
603
604static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
605{
606 struct sfq_sched_data *q = qdisc_priv(sch);
607
608 if (cl)
609 return NULL;
610 return &q->filter_list;
611}
612
613static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
614 struct sk_buff *skb, struct tcmsg *tcm)
615{
616 tcm->tcm_handle |= TC_H_MIN(cl);
617 return 0;
618}
619
620static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
621 struct gnet_dump *d)
622{
623 struct sfq_sched_data *q = qdisc_priv(sch);
624 sfq_index idx = q->ht[cl - 1];
625 struct gnet_stats_queue qs = { 0 };
626 struct tc_sfq_xstats xstats = { 0 };
627 struct sk_buff *skb;
628
629 if (idx != SFQ_EMPTY_SLOT) {
630 const struct sfq_slot *slot = &q->slots[idx];
631
632 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
633 qs.qlen = slot->qlen;
634 slot_queue_walk(slot, skb)
635 qs.backlog += qdisc_pkt_len(skb);
636 }
637 if (gnet_stats_copy_queue(d, &qs) < 0)
638 return -1;
639 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
640}
641
642static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
643{
644 struct sfq_sched_data *q = qdisc_priv(sch);
645 unsigned int i;
646
647 if (arg->stop)
648 return;
649
650 for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
651 if (q->ht[i] == SFQ_EMPTY_SLOT ||
652 arg->count < arg->skip) {
653 arg->count++;
654 continue;
655 }
656 if (arg->fn(sch, i + 1, arg) < 0) {
657 arg->stop = 1;
658 break;
659 }
660 arg->count++;
661 }
662}
663
664static const struct Qdisc_class_ops sfq_class_ops = {
665 .leaf = sfq_leaf,
666 .get = sfq_get,
667 .put = sfq_put,
668 .tcf_chain = sfq_find_tcf,
669 .bind_tcf = sfq_bind,
670 .unbind_tcf = sfq_put,
671 .dump = sfq_dump_class,
672 .dump_stats = sfq_dump_class_stats,
673 .walk = sfq_walk,
674};
675
676static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
677 .cl_ops = &sfq_class_ops,
678 .id = "sfq",
679 .priv_size = sizeof(struct sfq_sched_data),
680 .enqueue = sfq_enqueue,
681 .dequeue = sfq_dequeue,
682 .peek = sfq_peek,
683 .drop = sfq_drop,
684 .init = sfq_init,
685 .reset = sfq_reset,
686 .destroy = sfq_destroy,
687 .change = NULL,
688 .dump = sfq_dump,
689 .owner = THIS_MODULE,
690};
691
692static int __init sfq_module_init(void)
693{
694 return register_qdisc(&sfq_qdisc_ops);
695}
696static void __exit sfq_module_exit(void)
697{
698 unregister_qdisc(&sfq_qdisc_ops);
699}
700module_init(sfq_module_init)
701module_exit(sfq_module_exit)
702MODULE_LICENSE("GPL");
703