1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/vmalloc.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20#include <net/inet_ecn.h>
21#include <net/red.h>
22#include <net/flow_dissector.h>
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#define CHOKE_MAX_QUEUE (128*1024 - 1)
51
52struct choke_sched_data {
53
54 u32 limit;
55 unsigned char flags;
56
57 struct red_parms parms;
58
59
60 struct red_vars vars;
61 struct {
62 u32 prob_drop;
63 u32 prob_mark;
64 u32 forced_drop;
65 u32 forced_mark;
66 u32 pdrop;
67 u32 other;
68 u32 matched;
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask;
75
76 struct sk_buff **tab;
77};
78
79
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
119 struct sk_buff **to_free)
120{
121 struct choke_sched_data *q = qdisc_priv(sch);
122 struct sk_buff *skb = q->tab[idx];
123
124 q->tab[idx] = NULL;
125
126 if (idx == q->head)
127 choke_zap_head_holes(q);
128 if (idx == q->tail)
129 choke_zap_tail_holes(q);
130
131 qdisc_qstats_backlog_dec(sch, skb);
132 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
133 qdisc_drop(skb, sch, to_free);
134 --sch->q.qlen;
135}
136
137struct choke_skb_cb {
138 u16 classid;
139 u8 keys_valid;
140 struct flow_keys_digest keys;
141};
142
143static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
144{
145 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
146 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
147}
148
149static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
150{
151 choke_skb_cb(skb)->classid = classid;
152}
153
154
155
156
157
158
159static bool choke_match_flow(struct sk_buff *skb1,
160 struct sk_buff *skb2)
161{
162 struct flow_keys temp;
163
164 if (skb1->protocol != skb2->protocol)
165 return false;
166
167 if (!choke_skb_cb(skb1)->keys_valid) {
168 choke_skb_cb(skb1)->keys_valid = 1;
169 skb_flow_dissect_flow_keys(skb1, &temp, 0);
170 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
171 }
172
173 if (!choke_skb_cb(skb2)->keys_valid) {
174 choke_skb_cb(skb2)->keys_valid = 1;
175 skb_flow_dissect_flow_keys(skb2, &temp, 0);
176 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
177 }
178
179 return !memcmp(&choke_skb_cb(skb1)->keys,
180 &choke_skb_cb(skb2)->keys,
181 sizeof(choke_skb_cb(skb1)->keys));
182}
183
184
185
186
187
188
189
190static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
191 unsigned int *pidx)
192{
193 struct sk_buff *skb;
194 int retrys = 3;
195
196 do {
197 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
198 skb = q->tab[*pidx];
199 if (skb)
200 return skb;
201 } while (--retrys > 0);
202
203 return q->tab[*pidx = q->head];
204}
205
206
207
208
209
210static bool choke_match_random(const struct choke_sched_data *q,
211 struct sk_buff *nskb,
212 unsigned int *pidx)
213{
214 struct sk_buff *oskb;
215
216 if (q->head == q->tail)
217 return false;
218
219 oskb = choke_peek_random(q, pidx);
220 return choke_match_flow(oskb, nskb);
221}
222
223static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
224 struct sk_buff **to_free)
225{
226 struct choke_sched_data *q = qdisc_priv(sch);
227 const struct red_parms *p = &q->parms;
228
229 choke_skb_cb(skb)->keys_valid = 0;
230
231 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
232 if (red_is_idling(&q->vars))
233 red_end_of_idle_period(&q->vars);
234
235
236 if (q->vars.qavg <= p->qth_min)
237 q->vars.qcount = -1;
238 else {
239 unsigned int idx;
240
241
242 if (choke_match_random(q, skb, &idx)) {
243 q->stats.matched++;
244 choke_drop_by_idx(sch, idx, to_free);
245 goto congestion_drop;
246 }
247
248
249 if (q->vars.qavg > p->qth_max) {
250 q->vars.qcount = -1;
251
252 qdisc_qstats_overlimit(sch);
253 if (use_harddrop(q) || !use_ecn(q) ||
254 !INET_ECN_set_ce(skb)) {
255 q->stats.forced_drop++;
256 goto congestion_drop;
257 }
258
259 q->stats.forced_mark++;
260 } else if (++q->vars.qcount) {
261 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
262 q->vars.qcount = 0;
263 q->vars.qR = red_random(p);
264
265 qdisc_qstats_overlimit(sch);
266 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
267 q->stats.prob_drop++;
268 goto congestion_drop;
269 }
270
271 q->stats.prob_mark++;
272 }
273 } else
274 q->vars.qR = red_random(p);
275 }
276
277
278 if (sch->q.qlen < q->limit) {
279 q->tab[q->tail] = skb;
280 q->tail = (q->tail + 1) & q->tab_mask;
281 ++sch->q.qlen;
282 qdisc_qstats_backlog_inc(sch, skb);
283 return NET_XMIT_SUCCESS;
284 }
285
286 q->stats.pdrop++;
287 return qdisc_drop(skb, sch, to_free);
288
289congestion_drop:
290 qdisc_drop(skb, sch, to_free);
291 return NET_XMIT_CN;
292}
293
294static struct sk_buff *choke_dequeue(struct Qdisc *sch)
295{
296 struct choke_sched_data *q = qdisc_priv(sch);
297 struct sk_buff *skb;
298
299 if (q->head == q->tail) {
300 if (!red_is_idling(&q->vars))
301 red_start_of_idle_period(&q->vars);
302 return NULL;
303 }
304
305 skb = q->tab[q->head];
306 q->tab[q->head] = NULL;
307 choke_zap_head_holes(q);
308 --sch->q.qlen;
309 qdisc_qstats_backlog_dec(sch, skb);
310 qdisc_bstats_update(sch, skb);
311
312 return skb;
313}
314
315static void choke_reset(struct Qdisc *sch)
316{
317 struct choke_sched_data *q = qdisc_priv(sch);
318
319 while (q->head != q->tail) {
320 struct sk_buff *skb = q->tab[q->head];
321
322 q->head = (q->head + 1) & q->tab_mask;
323 if (!skb)
324 continue;
325 rtnl_qdisc_drop(skb, sch);
326 }
327
328 sch->q.qlen = 0;
329 sch->qstats.backlog = 0;
330 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
331 q->head = q->tail = 0;
332 red_restart(&q->vars);
333}
334
335static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
336 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
337 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
338 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
339};
340
341
342static void choke_free(void *addr)
343{
344 kvfree(addr);
345}
346
347static int choke_change(struct Qdisc *sch, struct nlattr *opt)
348{
349 struct choke_sched_data *q = qdisc_priv(sch);
350 struct nlattr *tb[TCA_CHOKE_MAX + 1];
351 const struct tc_red_qopt *ctl;
352 int err;
353 struct sk_buff **old = NULL;
354 unsigned int mask;
355 u32 max_P;
356
357 if (opt == NULL)
358 return -EINVAL;
359
360 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
361 if (err < 0)
362 return err;
363
364 if (tb[TCA_CHOKE_PARMS] == NULL ||
365 tb[TCA_CHOKE_STAB] == NULL)
366 return -EINVAL;
367
368 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
369
370 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
371
372 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
373 return -EINVAL;
374
375 if (ctl->limit > CHOKE_MAX_QUEUE)
376 return -EINVAL;
377
378 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
379 if (mask != q->tab_mask) {
380 struct sk_buff **ntab;
381
382 ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
383 if (!ntab)
384 return -ENOMEM;
385
386 sch_tree_lock(sch);
387 old = q->tab;
388 if (old) {
389 unsigned int oqlen = sch->q.qlen, tail = 0;
390 unsigned dropped = 0;
391
392 while (q->head != q->tail) {
393 struct sk_buff *skb = q->tab[q->head];
394
395 q->head = (q->head + 1) & q->tab_mask;
396 if (!skb)
397 continue;
398 if (tail < mask) {
399 ntab[tail++] = skb;
400 continue;
401 }
402 dropped += qdisc_pkt_len(skb);
403 qdisc_qstats_backlog_dec(sch, skb);
404 --sch->q.qlen;
405 rtnl_qdisc_drop(skb, sch);
406 }
407 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
408 q->head = 0;
409 q->tail = tail;
410 }
411
412 q->tab_mask = mask;
413 q->tab = ntab;
414 } else
415 sch_tree_lock(sch);
416
417 q->flags = ctl->flags;
418 q->limit = ctl->limit;
419
420 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
421 ctl->Plog, ctl->Scell_log,
422 nla_data(tb[TCA_CHOKE_STAB]),
423 max_P);
424 red_set_vars(&q->vars);
425
426 if (q->head == q->tail)
427 red_end_of_idle_period(&q->vars);
428
429 sch_tree_unlock(sch);
430 choke_free(old);
431 return 0;
432}
433
434static int choke_init(struct Qdisc *sch, struct nlattr *opt)
435{
436 return choke_change(sch, opt);
437}
438
439static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
440{
441 struct choke_sched_data *q = qdisc_priv(sch);
442 struct nlattr *opts = NULL;
443 struct tc_red_qopt opt = {
444 .limit = q->limit,
445 .flags = q->flags,
446 .qth_min = q->parms.qth_min >> q->parms.Wlog,
447 .qth_max = q->parms.qth_max >> q->parms.Wlog,
448 .Wlog = q->parms.Wlog,
449 .Plog = q->parms.Plog,
450 .Scell_log = q->parms.Scell_log,
451 };
452
453 opts = nla_nest_start(skb, TCA_OPTIONS);
454 if (opts == NULL)
455 goto nla_put_failure;
456
457 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
458 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
459 goto nla_put_failure;
460 return nla_nest_end(skb, opts);
461
462nla_put_failure:
463 nla_nest_cancel(skb, opts);
464 return -EMSGSIZE;
465}
466
467static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
468{
469 struct choke_sched_data *q = qdisc_priv(sch);
470 struct tc_choke_xstats st = {
471 .early = q->stats.prob_drop + q->stats.forced_drop,
472 .marked = q->stats.prob_mark + q->stats.forced_mark,
473 .pdrop = q->stats.pdrop,
474 .other = q->stats.other,
475 .matched = q->stats.matched,
476 };
477
478 return gnet_stats_copy_app(d, &st, sizeof(st));
479}
480
481static void choke_destroy(struct Qdisc *sch)
482{
483 struct choke_sched_data *q = qdisc_priv(sch);
484
485 choke_free(q->tab);
486}
487
488static struct sk_buff *choke_peek_head(struct Qdisc *sch)
489{
490 struct choke_sched_data *q = qdisc_priv(sch);
491
492 return (q->head != q->tail) ? q->tab[q->head] : NULL;
493}
494
495static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
496 .id = "choke",
497 .priv_size = sizeof(struct choke_sched_data),
498
499 .enqueue = choke_enqueue,
500 .dequeue = choke_dequeue,
501 .peek = choke_peek_head,
502 .init = choke_init,
503 .destroy = choke_destroy,
504 .reset = choke_reset,
505 .change = choke_change,
506 .dump = choke_dump,
507 .dump_stats = choke_dump_stats,
508 .owner = THIS_MODULE,
509};
510
511static int __init choke_module_init(void)
512{
513 return register_qdisc(&choke_qdisc_ops);
514}
515
516static void __exit choke_module_exit(void)
517{
518 unregister_qdisc(&choke_qdisc_ops);
519}
520
521module_init(choke_module_init)
522module_exit(choke_module_exit)
523
524MODULE_LICENSE("GPL");
525