1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/vmalloc.h>
18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h>
20#include <net/red.h>
21#include <net/flow_keys.h>
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51struct choke_sched_data {
52
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58
59 struct red_vars vars;
60 struct tcf_proto *filter_list;
61 struct {
62 u32 prob_drop;
63 u32 prob_mark;
64 u32 forced_drop;
65 u32 forced_mark;
66 u32 pdrop;
67 u32 other;
68 u32 matched;
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask;
75
76 struct sk_buff **tab;
77};
78
79
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
119{
120 struct choke_sched_data *q = qdisc_priv(sch);
121 struct sk_buff *skb = q->tab[idx];
122
123 q->tab[idx] = NULL;
124
125 if (idx == q->head)
126 choke_zap_head_holes(q);
127 if (idx == q->tail)
128 choke_zap_tail_holes(q);
129
130 sch->qstats.backlog -= qdisc_pkt_len(skb);
131 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen;
134}
135
136struct choke_skb_cb {
137 u16 classid;
138 u8 keys_valid;
139 struct flow_keys keys;
140};
141
142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
143{
144 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
145 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
146}
147
148static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
149{
150 choke_skb_cb(skb)->classid = classid;
151}
152
153static u16 choke_get_classid(const struct sk_buff *skb)
154{
155 return choke_skb_cb(skb)->classid;
156}
157
158
159
160
161
162
163static bool choke_match_flow(struct sk_buff *skb1,
164 struct sk_buff *skb2)
165{
166 if (skb1->protocol != skb2->protocol)
167 return false;
168
169 if (!choke_skb_cb(skb1)->keys_valid) {
170 choke_skb_cb(skb1)->keys_valid = 1;
171 skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
172 }
173
174 if (!choke_skb_cb(skb2)->keys_valid) {
175 choke_skb_cb(skb2)->keys_valid = 1;
176 skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
177 }
178
179 return !memcmp(&choke_skb_cb(skb1)->keys,
180 &choke_skb_cb(skb2)->keys,
181 sizeof(struct flow_keys));
182}
183
184
185
186
187
188
189
190static bool choke_classify(struct sk_buff *skb,
191 struct Qdisc *sch, int *qerr)
192
193{
194 struct choke_sched_data *q = qdisc_priv(sch);
195 struct tcf_result res;
196 int result;
197
198 result = tc_classify(skb, q->filter_list, &res);
199 if (result >= 0) {
200#ifdef CONFIG_NET_CLS_ACT
201 switch (result) {
202 case TC_ACT_STOLEN:
203 case TC_ACT_QUEUED:
204 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
205 case TC_ACT_SHOT:
206 return false;
207 }
208#endif
209 choke_set_classid(skb, TC_H_MIN(res.classid));
210 return true;
211 }
212
213 return false;
214}
215
216
217
218
219
220
221
222static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
223 unsigned int *pidx)
224{
225 struct sk_buff *skb;
226 int retrys = 3;
227
228 do {
229 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
230 skb = q->tab[*pidx];
231 if (skb)
232 return skb;
233 } while (--retrys > 0);
234
235 return q->tab[*pidx = q->head];
236}
237
238
239
240
241
242static bool choke_match_random(const struct choke_sched_data *q,
243 struct sk_buff *nskb,
244 unsigned int *pidx)
245{
246 struct sk_buff *oskb;
247
248 if (q->head == q->tail)
249 return false;
250
251 oskb = choke_peek_random(q, pidx);
252 if (q->filter_list)
253 return choke_get_classid(nskb) == choke_get_classid(oskb);
254
255 return choke_match_flow(oskb, nskb);
256}
257
258static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
259{
260 struct choke_sched_data *q = qdisc_priv(sch);
261 const struct red_parms *p = &q->parms;
262 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
263
264 if (q->filter_list) {
265
266 if (!choke_classify(skb, sch, &ret))
267 goto other_drop;
268 }
269
270 choke_skb_cb(skb)->keys_valid = 0;
271
272 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
273 if (red_is_idling(&q->vars))
274 red_end_of_idle_period(&q->vars);
275
276
277 if (q->vars.qavg <= p->qth_min)
278 q->vars.qcount = -1;
279 else {
280 unsigned int idx;
281
282
283 if (choke_match_random(q, skb, &idx)) {
284 q->stats.matched++;
285 choke_drop_by_idx(sch, idx);
286 goto congestion_drop;
287 }
288
289
290 if (q->vars.qavg > p->qth_max) {
291 q->vars.qcount = -1;
292
293 sch->qstats.overlimits++;
294 if (use_harddrop(q) || !use_ecn(q) ||
295 !INET_ECN_set_ce(skb)) {
296 q->stats.forced_drop++;
297 goto congestion_drop;
298 }
299
300 q->stats.forced_mark++;
301 } else if (++q->vars.qcount) {
302 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
303 q->vars.qcount = 0;
304 q->vars.qR = red_random(p);
305
306 sch->qstats.overlimits++;
307 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
308 q->stats.prob_drop++;
309 goto congestion_drop;
310 }
311
312 q->stats.prob_mark++;
313 }
314 } else
315 q->vars.qR = red_random(p);
316 }
317
318
319 if (sch->q.qlen < q->limit) {
320 q->tab[q->tail] = skb;
321 q->tail = (q->tail + 1) & q->tab_mask;
322 ++sch->q.qlen;
323 sch->qstats.backlog += qdisc_pkt_len(skb);
324 return NET_XMIT_SUCCESS;
325 }
326
327 q->stats.pdrop++;
328 return qdisc_drop(skb, sch);
329
330congestion_drop:
331 qdisc_drop(skb, sch);
332 return NET_XMIT_CN;
333
334other_drop:
335 if (ret & __NET_XMIT_BYPASS)
336 sch->qstats.drops++;
337 kfree_skb(skb);
338 return ret;
339}
340
341static struct sk_buff *choke_dequeue(struct Qdisc *sch)
342{
343 struct choke_sched_data *q = qdisc_priv(sch);
344 struct sk_buff *skb;
345
346 if (q->head == q->tail) {
347 if (!red_is_idling(&q->vars))
348 red_start_of_idle_period(&q->vars);
349 return NULL;
350 }
351
352 skb = q->tab[q->head];
353 q->tab[q->head] = NULL;
354 choke_zap_head_holes(q);
355 --sch->q.qlen;
356 sch->qstats.backlog -= qdisc_pkt_len(skb);
357 qdisc_bstats_update(sch, skb);
358
359 return skb;
360}
361
362static unsigned int choke_drop(struct Qdisc *sch)
363{
364 struct choke_sched_data *q = qdisc_priv(sch);
365 unsigned int len;
366
367 len = qdisc_queue_drop(sch);
368 if (len > 0)
369 q->stats.other++;
370 else {
371 if (!red_is_idling(&q->vars))
372 red_start_of_idle_period(&q->vars);
373 }
374
375 return len;
376}
377
378static void choke_reset(struct Qdisc *sch)
379{
380 struct choke_sched_data *q = qdisc_priv(sch);
381
382 red_restart(&q->vars);
383}
384
385static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
386 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
387 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
388 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
389};
390
391
392static void choke_free(void *addr)
393{
394 if (addr) {
395 if (is_vmalloc_addr(addr))
396 vfree(addr);
397 else
398 kfree(addr);
399 }
400}
401
402static int choke_change(struct Qdisc *sch, struct nlattr *opt)
403{
404 struct choke_sched_data *q = qdisc_priv(sch);
405 struct nlattr *tb[TCA_CHOKE_MAX + 1];
406 const struct tc_red_qopt *ctl;
407 int err;
408 struct sk_buff **old = NULL;
409 unsigned int mask;
410 u32 max_P;
411
412 if (opt == NULL)
413 return -EINVAL;
414
415 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
416 if (err < 0)
417 return err;
418
419 if (tb[TCA_CHOKE_PARMS] == NULL ||
420 tb[TCA_CHOKE_STAB] == NULL)
421 return -EINVAL;
422
423 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
424
425 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
426
427 if (ctl->limit > CHOKE_MAX_QUEUE)
428 return -EINVAL;
429
430 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
431 if (mask != q->tab_mask) {
432 struct sk_buff **ntab;
433
434 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
435 GFP_KERNEL | __GFP_NOWARN);
436 if (!ntab)
437 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
438 if (!ntab)
439 return -ENOMEM;
440
441 sch_tree_lock(sch);
442 old = q->tab;
443 if (old) {
444 unsigned int oqlen = sch->q.qlen, tail = 0;
445
446 while (q->head != q->tail) {
447 struct sk_buff *skb = q->tab[q->head];
448
449 q->head = (q->head + 1) & q->tab_mask;
450 if (!skb)
451 continue;
452 if (tail < mask) {
453 ntab[tail++] = skb;
454 continue;
455 }
456 sch->qstats.backlog -= qdisc_pkt_len(skb);
457 --sch->q.qlen;
458 qdisc_drop(skb, sch);
459 }
460 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
461 q->head = 0;
462 q->tail = tail;
463 }
464
465 q->tab_mask = mask;
466 q->tab = ntab;
467 } else
468 sch_tree_lock(sch);
469
470 q->flags = ctl->flags;
471 q->limit = ctl->limit;
472
473 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
474 ctl->Plog, ctl->Scell_log,
475 nla_data(tb[TCA_CHOKE_STAB]),
476 max_P);
477 red_set_vars(&q->vars);
478
479 if (q->head == q->tail)
480 red_end_of_idle_period(&q->vars);
481
482 sch_tree_unlock(sch);
483 choke_free(old);
484 return 0;
485}
486
487static int choke_init(struct Qdisc *sch, struct nlattr *opt)
488{
489 return choke_change(sch, opt);
490}
491
492static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
493{
494 struct choke_sched_data *q = qdisc_priv(sch);
495 struct nlattr *opts = NULL;
496 struct tc_red_qopt opt = {
497 .limit = q->limit,
498 .flags = q->flags,
499 .qth_min = q->parms.qth_min >> q->parms.Wlog,
500 .qth_max = q->parms.qth_max >> q->parms.Wlog,
501 .Wlog = q->parms.Wlog,
502 .Plog = q->parms.Plog,
503 .Scell_log = q->parms.Scell_log,
504 };
505
506 opts = nla_nest_start(skb, TCA_OPTIONS);
507 if (opts == NULL)
508 goto nla_put_failure;
509
510 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
511 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
512 goto nla_put_failure;
513 return nla_nest_end(skb, opts);
514
515nla_put_failure:
516 nla_nest_cancel(skb, opts);
517 return -EMSGSIZE;
518}
519
520static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
521{
522 struct choke_sched_data *q = qdisc_priv(sch);
523 struct tc_choke_xstats st = {
524 .early = q->stats.prob_drop + q->stats.forced_drop,
525 .marked = q->stats.prob_mark + q->stats.forced_mark,
526 .pdrop = q->stats.pdrop,
527 .other = q->stats.other,
528 .matched = q->stats.matched,
529 };
530
531 return gnet_stats_copy_app(d, &st, sizeof(st));
532}
533
534static void choke_destroy(struct Qdisc *sch)
535{
536 struct choke_sched_data *q = qdisc_priv(sch);
537
538 tcf_destroy_chain(&q->filter_list);
539 choke_free(q->tab);
540}
541
542static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
543{
544 return NULL;
545}
546
547static unsigned long choke_get(struct Qdisc *sch, u32 classid)
548{
549 return 0;
550}
551
552static void choke_put(struct Qdisc *q, unsigned long cl)
553{
554}
555
556static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
557 u32 classid)
558{
559 return 0;
560}
561
562static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
563{
564 struct choke_sched_data *q = qdisc_priv(sch);
565
566 if (cl)
567 return NULL;
568 return &q->filter_list;
569}
570
571static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
572 struct sk_buff *skb, struct tcmsg *tcm)
573{
574 tcm->tcm_handle |= TC_H_MIN(cl);
575 return 0;
576}
577
578static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
579{
580 if (!arg->stop) {
581 if (arg->fn(sch, 1, arg) < 0) {
582 arg->stop = 1;
583 return;
584 }
585 arg->count++;
586 }
587}
588
589static const struct Qdisc_class_ops choke_class_ops = {
590 .leaf = choke_leaf,
591 .get = choke_get,
592 .put = choke_put,
593 .tcf_chain = choke_find_tcf,
594 .bind_tcf = choke_bind,
595 .unbind_tcf = choke_put,
596 .dump = choke_dump_class,
597 .walk = choke_walk,
598};
599
600static struct sk_buff *choke_peek_head(struct Qdisc *sch)
601{
602 struct choke_sched_data *q = qdisc_priv(sch);
603
604 return (q->head != q->tail) ? q->tab[q->head] : NULL;
605}
606
607static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
608 .id = "choke",
609 .priv_size = sizeof(struct choke_sched_data),
610
611 .enqueue = choke_enqueue,
612 .dequeue = choke_dequeue,
613 .peek = choke_peek_head,
614 .drop = choke_drop,
615 .init = choke_init,
616 .destroy = choke_destroy,
617 .reset = choke_reset,
618 .change = choke_change,
619 .dump = choke_dump,
620 .dump_stats = choke_dump_stats,
621 .owner = THIS_MODULE,
622};
623
624static int __init choke_module_init(void)
625{
626 return register_qdisc(&choke_qdisc_ops);
627}
628
629static void __exit choke_module_exit(void)
630{
631 unregister_qdisc(&choke_qdisc_ops);
632}
633
634module_init(choke_module_init)
635module_exit(choke_module_exit)
636
637MODULE_LICENSE("GPL");
638