1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/vmalloc.h>
18#include <net/pkt_sched.h>
19#include <net/inet_ecn.h>
20#include <net/red.h>
21#include <net/flow_dissector.h>
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#define CHOKE_MAX_QUEUE (128*1024 - 1)
50
51struct choke_sched_data {
52
53 u32 limit;
54 unsigned char flags;
55
56 struct red_parms parms;
57
58
59 struct red_vars vars;
60 struct tcf_proto __rcu *filter_list;
61 struct {
62 u32 prob_drop;
63 u32 prob_mark;
64 u32 forced_drop;
65 u32 forced_mark;
66 u32 pdrop;
67 u32 other;
68 u32 matched;
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask;
75
76 struct sk_buff **tab;
77};
78
79
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
119 struct sk_buff **to_free)
120{
121 struct choke_sched_data *q = qdisc_priv(sch);
122 struct sk_buff *skb = q->tab[idx];
123
124 q->tab[idx] = NULL;
125
126 if (idx == q->head)
127 choke_zap_head_holes(q);
128 if (idx == q->tail)
129 choke_zap_tail_holes(q);
130
131 qdisc_qstats_backlog_dec(sch, skb);
132 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
133 qdisc_drop(skb, sch, to_free);
134 --sch->q.qlen;
135}
136
137struct choke_skb_cb {
138 u16 classid;
139 u8 keys_valid;
140 struct flow_keys_digest keys;
141};
142
143static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
144{
145 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
146 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
147}
148
149static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
150{
151 choke_skb_cb(skb)->classid = classid;
152}
153
154static u16 choke_get_classid(const struct sk_buff *skb)
155{
156 return choke_skb_cb(skb)->classid;
157}
158
159
160
161
162
163
164static bool choke_match_flow(struct sk_buff *skb1,
165 struct sk_buff *skb2)
166{
167 struct flow_keys temp;
168
169 if (skb1->protocol != skb2->protocol)
170 return false;
171
172 if (!choke_skb_cb(skb1)->keys_valid) {
173 choke_skb_cb(skb1)->keys_valid = 1;
174 skb_flow_dissect_flow_keys(skb1, &temp, 0);
175 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
176 }
177
178 if (!choke_skb_cb(skb2)->keys_valid) {
179 choke_skb_cb(skb2)->keys_valid = 1;
180 skb_flow_dissect_flow_keys(skb2, &temp, 0);
181 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
182 }
183
184 return !memcmp(&choke_skb_cb(skb1)->keys,
185 &choke_skb_cb(skb2)->keys,
186 sizeof(choke_skb_cb(skb1)->keys));
187}
188
189
190
191
192
193
194
195static bool choke_classify(struct sk_buff *skb,
196 struct Qdisc *sch, int *qerr)
197
198{
199 struct choke_sched_data *q = qdisc_priv(sch);
200 struct tcf_result res;
201 struct tcf_proto *fl;
202 int result;
203
204 fl = rcu_dereference_bh(q->filter_list);
205 result = tc_classify(skb, fl, &res, false);
206 if (result >= 0) {
207#ifdef CONFIG_NET_CLS_ACT
208 switch (result) {
209 case TC_ACT_STOLEN:
210 case TC_ACT_QUEUED:
211 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
212 case TC_ACT_SHOT:
213 return false;
214 }
215#endif
216 choke_set_classid(skb, TC_H_MIN(res.classid));
217 return true;
218 }
219
220 return false;
221}
222
223
224
225
226
227
228
229static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
230 unsigned int *pidx)
231{
232 struct sk_buff *skb;
233 int retrys = 3;
234
235 do {
236 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
237 skb = q->tab[*pidx];
238 if (skb)
239 return skb;
240 } while (--retrys > 0);
241
242 return q->tab[*pidx = q->head];
243}
244
245
246
247
248
249static bool choke_match_random(const struct choke_sched_data *q,
250 struct sk_buff *nskb,
251 unsigned int *pidx)
252{
253 struct sk_buff *oskb;
254
255 if (q->head == q->tail)
256 return false;
257
258 oskb = choke_peek_random(q, pidx);
259 if (rcu_access_pointer(q->filter_list))
260 return choke_get_classid(nskb) == choke_get_classid(oskb);
261
262 return choke_match_flow(oskb, nskb);
263}
264
265static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
266 struct sk_buff **to_free)
267{
268 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
269 struct choke_sched_data *q = qdisc_priv(sch);
270 const struct red_parms *p = &q->parms;
271
272 if (rcu_access_pointer(q->filter_list)) {
273
274 if (!choke_classify(skb, sch, &ret))
275 goto other_drop;
276 }
277
278 choke_skb_cb(skb)->keys_valid = 0;
279
280 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
281 if (red_is_idling(&q->vars))
282 red_end_of_idle_period(&q->vars);
283
284
285 if (q->vars.qavg <= p->qth_min)
286 q->vars.qcount = -1;
287 else {
288 unsigned int idx;
289
290
291 if (choke_match_random(q, skb, &idx)) {
292 q->stats.matched++;
293 choke_drop_by_idx(sch, idx, to_free);
294 goto congestion_drop;
295 }
296
297
298 if (q->vars.qavg > p->qth_max) {
299 q->vars.qcount = -1;
300
301 qdisc_qstats_overlimit(sch);
302 if (use_harddrop(q) || !use_ecn(q) ||
303 !INET_ECN_set_ce(skb)) {
304 q->stats.forced_drop++;
305 goto congestion_drop;
306 }
307
308 q->stats.forced_mark++;
309 } else if (++q->vars.qcount) {
310 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
311 q->vars.qcount = 0;
312 q->vars.qR = red_random(p);
313
314 qdisc_qstats_overlimit(sch);
315 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
316 q->stats.prob_drop++;
317 goto congestion_drop;
318 }
319
320 q->stats.prob_mark++;
321 }
322 } else
323 q->vars.qR = red_random(p);
324 }
325
326
327 if (sch->q.qlen < q->limit) {
328 q->tab[q->tail] = skb;
329 q->tail = (q->tail + 1) & q->tab_mask;
330 ++sch->q.qlen;
331 qdisc_qstats_backlog_inc(sch, skb);
332 return NET_XMIT_SUCCESS;
333 }
334
335 q->stats.pdrop++;
336 return qdisc_drop(skb, sch, to_free);
337
338congestion_drop:
339 qdisc_drop(skb, sch, to_free);
340 return NET_XMIT_CN;
341
342other_drop:
343 if (ret & __NET_XMIT_BYPASS)
344 qdisc_qstats_drop(sch);
345 __qdisc_drop(skb, to_free);
346 return ret;
347}
348
349static struct sk_buff *choke_dequeue(struct Qdisc *sch)
350{
351 struct choke_sched_data *q = qdisc_priv(sch);
352 struct sk_buff *skb;
353
354 if (q->head == q->tail) {
355 if (!red_is_idling(&q->vars))
356 red_start_of_idle_period(&q->vars);
357 return NULL;
358 }
359
360 skb = q->tab[q->head];
361 q->tab[q->head] = NULL;
362 choke_zap_head_holes(q);
363 --sch->q.qlen;
364 qdisc_qstats_backlog_dec(sch, skb);
365 qdisc_bstats_update(sch, skb);
366
367 return skb;
368}
369
370static void choke_reset(struct Qdisc *sch)
371{
372 struct choke_sched_data *q = qdisc_priv(sch);
373
374 while (q->head != q->tail) {
375 struct sk_buff *skb = q->tab[q->head];
376
377 q->head = (q->head + 1) & q->tab_mask;
378 if (!skb)
379 continue;
380 rtnl_qdisc_drop(skb, sch);
381 }
382
383 sch->q.qlen = 0;
384 sch->qstats.backlog = 0;
385 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
386 q->head = q->tail = 0;
387 red_restart(&q->vars);
388}
389
390static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
391 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
392 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
393 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
394};
395
396
397static void choke_free(void *addr)
398{
399 kvfree(addr);
400}
401
402static int choke_change(struct Qdisc *sch, struct nlattr *opt)
403{
404 struct choke_sched_data *q = qdisc_priv(sch);
405 struct nlattr *tb[TCA_CHOKE_MAX + 1];
406 const struct tc_red_qopt *ctl;
407 int err;
408 struct sk_buff **old = NULL;
409 unsigned int mask;
410 u32 max_P;
411
412 if (opt == NULL)
413 return -EINVAL;
414
415 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
416 if (err < 0)
417 return err;
418
419 if (tb[TCA_CHOKE_PARMS] == NULL ||
420 tb[TCA_CHOKE_STAB] == NULL)
421 return -EINVAL;
422
423 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
424
425 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
426
427 if (ctl->limit > CHOKE_MAX_QUEUE)
428 return -EINVAL;
429
430 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
431 if (mask != q->tab_mask) {
432 struct sk_buff **ntab;
433
434 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
435 GFP_KERNEL | __GFP_NOWARN);
436 if (!ntab)
437 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
438 if (!ntab)
439 return -ENOMEM;
440
441 sch_tree_lock(sch);
442 old = q->tab;
443 if (old) {
444 unsigned int oqlen = sch->q.qlen, tail = 0;
445 unsigned dropped = 0;
446
447 while (q->head != q->tail) {
448 struct sk_buff *skb = q->tab[q->head];
449
450 q->head = (q->head + 1) & q->tab_mask;
451 if (!skb)
452 continue;
453 if (tail < mask) {
454 ntab[tail++] = skb;
455 continue;
456 }
457 dropped += qdisc_pkt_len(skb);
458 qdisc_qstats_backlog_dec(sch, skb);
459 --sch->q.qlen;
460 rtnl_qdisc_drop(skb, sch);
461 }
462 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
463 q->head = 0;
464 q->tail = tail;
465 }
466
467 q->tab_mask = mask;
468 q->tab = ntab;
469 } else
470 sch_tree_lock(sch);
471
472 q->flags = ctl->flags;
473 q->limit = ctl->limit;
474
475 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
476 ctl->Plog, ctl->Scell_log,
477 nla_data(tb[TCA_CHOKE_STAB]),
478 max_P);
479 red_set_vars(&q->vars);
480
481 if (q->head == q->tail)
482 red_end_of_idle_period(&q->vars);
483
484 sch_tree_unlock(sch);
485 choke_free(old);
486 return 0;
487}
488
489static int choke_init(struct Qdisc *sch, struct nlattr *opt)
490{
491 return choke_change(sch, opt);
492}
493
494static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
495{
496 struct choke_sched_data *q = qdisc_priv(sch);
497 struct nlattr *opts = NULL;
498 struct tc_red_qopt opt = {
499 .limit = q->limit,
500 .flags = q->flags,
501 .qth_min = q->parms.qth_min >> q->parms.Wlog,
502 .qth_max = q->parms.qth_max >> q->parms.Wlog,
503 .Wlog = q->parms.Wlog,
504 .Plog = q->parms.Plog,
505 .Scell_log = q->parms.Scell_log,
506 };
507
508 opts = nla_nest_start(skb, TCA_OPTIONS);
509 if (opts == NULL)
510 goto nla_put_failure;
511
512 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
513 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
514 goto nla_put_failure;
515 return nla_nest_end(skb, opts);
516
517nla_put_failure:
518 nla_nest_cancel(skb, opts);
519 return -EMSGSIZE;
520}
521
522static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
523{
524 struct choke_sched_data *q = qdisc_priv(sch);
525 struct tc_choke_xstats st = {
526 .early = q->stats.prob_drop + q->stats.forced_drop,
527 .marked = q->stats.prob_mark + q->stats.forced_mark,
528 .pdrop = q->stats.pdrop,
529 .other = q->stats.other,
530 .matched = q->stats.matched,
531 };
532
533 return gnet_stats_copy_app(d, &st, sizeof(st));
534}
535
536static void choke_destroy(struct Qdisc *sch)
537{
538 struct choke_sched_data *q = qdisc_priv(sch);
539
540 tcf_destroy_chain(&q->filter_list);
541 choke_free(q->tab);
542}
543
544static struct sk_buff *choke_peek_head(struct Qdisc *sch)
545{
546 struct choke_sched_data *q = qdisc_priv(sch);
547
548 return (q->head != q->tail) ? q->tab[q->head] : NULL;
549}
550
551static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
552 .id = "choke",
553 .priv_size = sizeof(struct choke_sched_data),
554
555 .enqueue = choke_enqueue,
556 .dequeue = choke_dequeue,
557 .peek = choke_peek_head,
558 .init = choke_init,
559 .destroy = choke_destroy,
560 .reset = choke_reset,
561 .change = choke_change,
562 .dump = choke_dump,
563 .dump_stats = choke_dump_stats,
564 .owner = THIS_MODULE,
565};
566
567static int __init choke_module_init(void)
568{
569 return register_qdisc(&choke_qdisc_ops);
570}
571
572static void __exit choke_module_exit(void)
573{
574 unregister_qdisc(&choke_qdisc_ops);
575}
576
577module_init(choke_module_init)
578module_exit(choke_module_exit)
579
580MODULE_LICENSE("GPL");
581