1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/vmalloc.h>
18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h>
20#include <net/red.h>
21#include <net/flow_dissector.h>
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51struct choke_sched_data {
52
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58
59 struct red_vars vars;
60 struct tcf_proto __rcu *filter_list;
61 struct {
62 u32 prob_drop;
63 u32 prob_mark;
64 u32 forced_drop;
65 u32 forced_mark;
66 u32 pdrop;
67 u32 other;
68 u32 matched;
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask;
75
76 struct sk_buff **tab;
77};
78
79
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
119{
120 struct choke_sched_data *q = qdisc_priv(sch);
121 struct sk_buff *skb = q->tab[idx];
122
123 q->tab[idx] = NULL;
124
125 if (idx == q->head)
126 choke_zap_head_holes(q);
127 if (idx == q->tail)
128 choke_zap_tail_holes(q);
129
130 qdisc_qstats_backlog_dec(sch, skb);
131 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen;
134}
135
136struct choke_skb_cb {
137 u16 classid;
138 u8 keys_valid;
139 struct flow_keys_digest keys;
140};
141
142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
143{
144 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
145 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
146}
147
148static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
149{
150 choke_skb_cb(skb)->classid = classid;
151}
152
153static u16 choke_get_classid(const struct sk_buff *skb)
154{
155 return choke_skb_cb(skb)->classid;
156}
157
158
159
160
161
162
163static bool choke_match_flow(struct sk_buff *skb1,
164 struct sk_buff *skb2)
165{
166 struct flow_keys temp;
167
168 if (skb1->protocol != skb2->protocol)
169 return false;
170
171 if (!choke_skb_cb(skb1)->keys_valid) {
172 choke_skb_cb(skb1)->keys_valid = 1;
173 skb_flow_dissect_flow_keys(skb1, &temp, 0);
174 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
175 }
176
177 if (!choke_skb_cb(skb2)->keys_valid) {
178 choke_skb_cb(skb2)->keys_valid = 1;
179 skb_flow_dissect_flow_keys(skb2, &temp, 0);
180 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
181 }
182
183 return !memcmp(&choke_skb_cb(skb1)->keys,
184 &choke_skb_cb(skb2)->keys,
185 sizeof(choke_skb_cb(skb1)->keys));
186}
187
188
189
190
191
192
193
194static bool choke_classify(struct sk_buff *skb,
195 struct Qdisc *sch, int *qerr)
196
197{
198 struct choke_sched_data *q = qdisc_priv(sch);
199 struct tcf_result res;
200 struct tcf_proto *fl;
201 int result;
202
203 fl = rcu_dereference_bh(q->filter_list);
204 result = tc_classify(skb, fl, &res, false);
205 if (result >= 0) {
206#ifdef CONFIG_NET_CLS_ACT
207 switch (result) {
208 case TC_ACT_STOLEN:
209 case TC_ACT_QUEUED:
210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
211 case TC_ACT_SHOT:
212 return false;
213 }
214#endif
215 choke_set_classid(skb, TC_H_MIN(res.classid));
216 return true;
217 }
218
219 return false;
220}
221
222
223
224
225
226
227
228static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
229 unsigned int *pidx)
230{
231 struct sk_buff *skb;
232 int retrys = 3;
233
234 do {
235 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
236 skb = q->tab[*pidx];
237 if (skb)
238 return skb;
239 } while (--retrys > 0);
240
241 return q->tab[*pidx = q->head];
242}
243
244
245
246
247
248static bool choke_match_random(const struct choke_sched_data *q,
249 struct sk_buff *nskb,
250 unsigned int *pidx)
251{
252 struct sk_buff *oskb;
253
254 if (q->head == q->tail)
255 return false;
256
257 oskb = choke_peek_random(q, pidx);
258 if (rcu_access_pointer(q->filter_list))
259 return choke_get_classid(nskb) == choke_get_classid(oskb);
260
261 return choke_match_flow(oskb, nskb);
262}
263
264static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
265{
266 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
267 struct choke_sched_data *q = qdisc_priv(sch);
268 const struct red_parms *p = &q->parms;
269
270 if (rcu_access_pointer(q->filter_list)) {
271
272 if (!choke_classify(skb, sch, &ret))
273 goto other_drop;
274 }
275
276 choke_skb_cb(skb)->keys_valid = 0;
277
278 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
279 if (red_is_idling(&q->vars))
280 red_end_of_idle_period(&q->vars);
281
282
283 if (q->vars.qavg <= p->qth_min)
284 q->vars.qcount = -1;
285 else {
286 unsigned int idx;
287
288
289 if (choke_match_random(q, skb, &idx)) {
290 q->stats.matched++;
291 choke_drop_by_idx(sch, idx);
292 goto congestion_drop;
293 }
294
295
296 if (q->vars.qavg > p->qth_max) {
297 q->vars.qcount = -1;
298
299 qdisc_qstats_overlimit(sch);
300 if (use_harddrop(q) || !use_ecn(q) ||
301 !INET_ECN_set_ce(skb)) {
302 q->stats.forced_drop++;
303 goto congestion_drop;
304 }
305
306 q->stats.forced_mark++;
307 } else if (++q->vars.qcount) {
308 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
309 q->vars.qcount = 0;
310 q->vars.qR = red_random(p);
311
312 qdisc_qstats_overlimit(sch);
313 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
314 q->stats.prob_drop++;
315 goto congestion_drop;
316 }
317
318 q->stats.prob_mark++;
319 }
320 } else
321 q->vars.qR = red_random(p);
322 }
323
324
325 if (sch->q.qlen < q->limit) {
326 q->tab[q->tail] = skb;
327 q->tail = (q->tail + 1) & q->tab_mask;
328 ++sch->q.qlen;
329 qdisc_qstats_backlog_inc(sch, skb);
330 return NET_XMIT_SUCCESS;
331 }
332
333 q->stats.pdrop++;
334 return qdisc_drop(skb, sch);
335
336congestion_drop:
337 qdisc_drop(skb, sch);
338 return NET_XMIT_CN;
339
340other_drop:
341 if (ret & __NET_XMIT_BYPASS)
342 qdisc_qstats_drop(sch);
343 kfree_skb(skb);
344 return ret;
345}
346
347static struct sk_buff *choke_dequeue(struct Qdisc *sch)
348{
349 struct choke_sched_data *q = qdisc_priv(sch);
350 struct sk_buff *skb;
351
352 if (q->head == q->tail) {
353 if (!red_is_idling(&q->vars))
354 red_start_of_idle_period(&q->vars);
355 return NULL;
356 }
357
358 skb = q->tab[q->head];
359 q->tab[q->head] = NULL;
360 choke_zap_head_holes(q);
361 --sch->q.qlen;
362 qdisc_qstats_backlog_dec(sch, skb);
363 qdisc_bstats_update(sch, skb);
364
365 return skb;
366}
367
368static unsigned int choke_drop(struct Qdisc *sch)
369{
370 struct choke_sched_data *q = qdisc_priv(sch);
371 unsigned int len;
372
373 len = qdisc_queue_drop(sch);
374 if (len > 0)
375 q->stats.other++;
376 else {
377 if (!red_is_idling(&q->vars))
378 red_start_of_idle_period(&q->vars);
379 }
380
381 return len;
382}
383
384static void choke_reset(struct Qdisc *sch)
385{
386 struct choke_sched_data *q = qdisc_priv(sch);
387
388 while (q->head != q->tail) {
389 struct sk_buff *skb = q->tab[q->head];
390
391 q->head = (q->head + 1) & q->tab_mask;
392 if (!skb)
393 continue;
394 qdisc_qstats_backlog_dec(sch, skb);
395 --sch->q.qlen;
396 qdisc_drop(skb, sch);
397 }
398
399 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
400 q->head = q->tail = 0;
401 red_restart(&q->vars);
402}
403
404static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
405 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
406 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
407 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
408};
409
410
411static void choke_free(void *addr)
412{
413 kvfree(addr);
414}
415
416static int choke_change(struct Qdisc *sch, struct nlattr *opt)
417{
418 struct choke_sched_data *q = qdisc_priv(sch);
419 struct nlattr *tb[TCA_CHOKE_MAX + 1];
420 const struct tc_red_qopt *ctl;
421 int err;
422 struct sk_buff **old = NULL;
423 unsigned int mask;
424 u32 max_P;
425
426 if (opt == NULL)
427 return -EINVAL;
428
429 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
430 if (err < 0)
431 return err;
432
433 if (tb[TCA_CHOKE_PARMS] == NULL ||
434 tb[TCA_CHOKE_STAB] == NULL)
435 return -EINVAL;
436
437 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
438
439 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
440
441 if (ctl->limit > CHOKE_MAX_QUEUE)
442 return -EINVAL;
443
444 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
445 if (mask != q->tab_mask) {
446 struct sk_buff **ntab;
447
448 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
449 GFP_KERNEL | __GFP_NOWARN);
450 if (!ntab)
451 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
452 if (!ntab)
453 return -ENOMEM;
454
455 sch_tree_lock(sch);
456 old = q->tab;
457 if (old) {
458 unsigned int oqlen = sch->q.qlen, tail = 0;
459
460 while (q->head != q->tail) {
461 struct sk_buff *skb = q->tab[q->head];
462
463 q->head = (q->head + 1) & q->tab_mask;
464 if (!skb)
465 continue;
466 if (tail < mask) {
467 ntab[tail++] = skb;
468 continue;
469 }
470 qdisc_qstats_backlog_dec(sch, skb);
471 --sch->q.qlen;
472 qdisc_drop(skb, sch);
473 }
474 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
475 q->head = 0;
476 q->tail = tail;
477 }
478
479 q->tab_mask = mask;
480 q->tab = ntab;
481 } else
482 sch_tree_lock(sch);
483
484 q->flags = ctl->flags;
485 q->limit = ctl->limit;
486
487 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
488 ctl->Plog, ctl->Scell_log,
489 nla_data(tb[TCA_CHOKE_STAB]),
490 max_P);
491 red_set_vars(&q->vars);
492
493 if (q->head == q->tail)
494 red_end_of_idle_period(&q->vars);
495
496 sch_tree_unlock(sch);
497 choke_free(old);
498 return 0;
499}
500
501static int choke_init(struct Qdisc *sch, struct nlattr *opt)
502{
503 return choke_change(sch, opt);
504}
505
506static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
507{
508 struct choke_sched_data *q = qdisc_priv(sch);
509 struct nlattr *opts = NULL;
510 struct tc_red_qopt opt = {
511 .limit = q->limit,
512 .flags = q->flags,
513 .qth_min = q->parms.qth_min >> q->parms.Wlog,
514 .qth_max = q->parms.qth_max >> q->parms.Wlog,
515 .Wlog = q->parms.Wlog,
516 .Plog = q->parms.Plog,
517 .Scell_log = q->parms.Scell_log,
518 };
519
520 opts = nla_nest_start(skb, TCA_OPTIONS);
521 if (opts == NULL)
522 goto nla_put_failure;
523
524 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
525 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
526 goto nla_put_failure;
527 return nla_nest_end(skb, opts);
528
529nla_put_failure:
530 nla_nest_cancel(skb, opts);
531 return -EMSGSIZE;
532}
533
534static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
535{
536 struct choke_sched_data *q = qdisc_priv(sch);
537 struct tc_choke_xstats st = {
538 .early = q->stats.prob_drop + q->stats.forced_drop,
539 .marked = q->stats.prob_mark + q->stats.forced_mark,
540 .pdrop = q->stats.pdrop,
541 .other = q->stats.other,
542 .matched = q->stats.matched,
543 };
544
545 return gnet_stats_copy_app(d, &st, sizeof(st));
546}
547
548static void choke_destroy(struct Qdisc *sch)
549{
550 struct choke_sched_data *q = qdisc_priv(sch);
551
552 tcf_destroy_chain(&q->filter_list);
553 choke_free(q->tab);
554}
555
556static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
557{
558 return NULL;
559}
560
561static unsigned long choke_get(struct Qdisc *sch, u32 classid)
562{
563 return 0;
564}
565
566static void choke_put(struct Qdisc *q, unsigned long cl)
567{
568}
569
570static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
571 u32 classid)
572{
573 return 0;
574}
575
576static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
577 unsigned long cl)
578{
579 struct choke_sched_data *q = qdisc_priv(sch);
580
581 if (cl)
582 return NULL;
583 return &q->filter_list;
584}
585
586static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
587 struct sk_buff *skb, struct tcmsg *tcm)
588{
589 tcm->tcm_handle |= TC_H_MIN(cl);
590 return 0;
591}
592
593static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
594{
595 if (!arg->stop) {
596 if (arg->fn(sch, 1, arg) < 0) {
597 arg->stop = 1;
598 return;
599 }
600 arg->count++;
601 }
602}
603
604static const struct Qdisc_class_ops choke_class_ops = {
605 .leaf = choke_leaf,
606 .get = choke_get,
607 .put = choke_put,
608 .tcf_chain = choke_find_tcf,
609 .bind_tcf = choke_bind,
610 .unbind_tcf = choke_put,
611 .dump = choke_dump_class,
612 .walk = choke_walk,
613};
614
615static struct sk_buff *choke_peek_head(struct Qdisc *sch)
616{
617 struct choke_sched_data *q = qdisc_priv(sch);
618
619 return (q->head != q->tail) ? q->tab[q->head] : NULL;
620}
621
622static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
623 .id = "choke",
624 .priv_size = sizeof(struct choke_sched_data),
625
626 .enqueue = choke_enqueue,
627 .dequeue = choke_dequeue,
628 .peek = choke_peek_head,
629 .drop = choke_drop,
630 .init = choke_init,
631 .destroy = choke_destroy,
632 .reset = choke_reset,
633 .change = choke_change,
634 .dump = choke_dump,
635 .dump_stats = choke_dump_stats,
636 .owner = THIS_MODULE,
637};
638
639static int __init choke_module_init(void)
640{
641 return register_qdisc(&choke_qdisc_ops);
642}
643
644static void __exit choke_module_exit(void)
645{
646 unregister_qdisc(&choke_qdisc_ops);
647}
648
649module_init(choke_module_init)
650module_exit(choke_module_exit)
651
652MODULE_LICENSE("GPL");
653