1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/vmalloc.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20#include <net/inet_ecn.h>
21#include <net/red.h>
22#include <net/flow_dissector.h>
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#define CHOKE_MAX_QUEUE (128*1024 - 1)
51
52struct choke_sched_data {
53
54 u32 limit;
55 unsigned char flags;
56
57 struct red_parms parms;
58
59
60 struct red_vars vars;
61 struct {
62 u32 prob_drop;
63 u32 prob_mark;
64 u32 forced_drop;
65 u32 forced_mark;
66 u32 pdrop;
67 u32 other;
68 u32 matched;
69 } stats;
70
71 unsigned int head;
72 unsigned int tail;
73
74 unsigned int tab_mask;
75
76 struct sk_buff **tab;
77};
78
79
80static unsigned int choke_len(const struct choke_sched_data *q)
81{
82 return (q->tail - q->head) & q->tab_mask;
83}
84
85
86static int use_ecn(const struct choke_sched_data *q)
87{
88 return q->flags & TC_RED_ECN;
89}
90
91
92static int use_harddrop(const struct choke_sched_data *q)
93{
94 return q->flags & TC_RED_HARDDROP;
95}
96
97
98static void choke_zap_head_holes(struct choke_sched_data *q)
99{
100 do {
101 q->head = (q->head + 1) & q->tab_mask;
102 if (q->head == q->tail)
103 break;
104 } while (q->tab[q->head] == NULL);
105}
106
107
108static void choke_zap_tail_holes(struct choke_sched_data *q)
109{
110 do {
111 q->tail = (q->tail - 1) & q->tab_mask;
112 if (q->head == q->tail)
113 break;
114 } while (q->tab[q->tail] == NULL);
115}
116
117
118static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
119 struct sk_buff **to_free)
120{
121 struct choke_sched_data *q = qdisc_priv(sch);
122 struct sk_buff *skb = q->tab[idx];
123
124 q->tab[idx] = NULL;
125
126 if (idx == q->head)
127 choke_zap_head_holes(q);
128 if (idx == q->tail)
129 choke_zap_tail_holes(q);
130
131 qdisc_qstats_backlog_dec(sch, skb);
132 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
133 qdisc_drop(skb, sch, to_free);
134 --sch->q.qlen;
135}
136
137struct choke_skb_cb {
138 u8 keys_valid;
139 struct flow_keys_digest keys;
140};
141
142static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
143{
144 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
145 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
146}
147
148
149
150
151
152
153static bool choke_match_flow(struct sk_buff *skb1,
154 struct sk_buff *skb2)
155{
156 struct flow_keys temp;
157
158 if (skb1->protocol != skb2->protocol)
159 return false;
160
161 if (!choke_skb_cb(skb1)->keys_valid) {
162 choke_skb_cb(skb1)->keys_valid = 1;
163 skb_flow_dissect_flow_keys(skb1, &temp, 0);
164 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
165 }
166
167 if (!choke_skb_cb(skb2)->keys_valid) {
168 choke_skb_cb(skb2)->keys_valid = 1;
169 skb_flow_dissect_flow_keys(skb2, &temp, 0);
170 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
171 }
172
173 return !memcmp(&choke_skb_cb(skb1)->keys,
174 &choke_skb_cb(skb2)->keys,
175 sizeof(choke_skb_cb(skb1)->keys));
176}
177
178
179
180
181
182
183
184static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
185 unsigned int *pidx)
186{
187 struct sk_buff *skb;
188 int retrys = 3;
189
190 do {
191 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
192 skb = q->tab[*pidx];
193 if (skb)
194 return skb;
195 } while (--retrys > 0);
196
197 return q->tab[*pidx = q->head];
198}
199
200
201
202
203
204static bool choke_match_random(const struct choke_sched_data *q,
205 struct sk_buff *nskb,
206 unsigned int *pidx)
207{
208 struct sk_buff *oskb;
209
210 if (q->head == q->tail)
211 return false;
212
213 oskb = choke_peek_random(q, pidx);
214 return choke_match_flow(oskb, nskb);
215}
216
217static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
218 struct sk_buff **to_free)
219{
220 struct choke_sched_data *q = qdisc_priv(sch);
221 const struct red_parms *p = &q->parms;
222
223 choke_skb_cb(skb)->keys_valid = 0;
224
225 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
226 if (red_is_idling(&q->vars))
227 red_end_of_idle_period(&q->vars);
228
229
230 if (q->vars.qavg <= p->qth_min)
231 q->vars.qcount = -1;
232 else {
233 unsigned int idx;
234
235
236 if (choke_match_random(q, skb, &idx)) {
237 q->stats.matched++;
238 choke_drop_by_idx(sch, idx, to_free);
239 goto congestion_drop;
240 }
241
242
243 if (q->vars.qavg > p->qth_max) {
244 q->vars.qcount = -1;
245
246 qdisc_qstats_overlimit(sch);
247 if (use_harddrop(q) || !use_ecn(q) ||
248 !INET_ECN_set_ce(skb)) {
249 q->stats.forced_drop++;
250 goto congestion_drop;
251 }
252
253 q->stats.forced_mark++;
254 } else if (++q->vars.qcount) {
255 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
256 q->vars.qcount = 0;
257 q->vars.qR = red_random(p);
258
259 qdisc_qstats_overlimit(sch);
260 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
261 q->stats.prob_drop++;
262 goto congestion_drop;
263 }
264
265 q->stats.prob_mark++;
266 }
267 } else
268 q->vars.qR = red_random(p);
269 }
270
271
272 if (sch->q.qlen < q->limit) {
273 q->tab[q->tail] = skb;
274 q->tail = (q->tail + 1) & q->tab_mask;
275 ++sch->q.qlen;
276 qdisc_qstats_backlog_inc(sch, skb);
277 return NET_XMIT_SUCCESS;
278 }
279
280 q->stats.pdrop++;
281 return qdisc_drop(skb, sch, to_free);
282
283congestion_drop:
284 qdisc_drop(skb, sch, to_free);
285 return NET_XMIT_CN;
286}
287
288static struct sk_buff *choke_dequeue(struct Qdisc *sch)
289{
290 struct choke_sched_data *q = qdisc_priv(sch);
291 struct sk_buff *skb;
292
293 if (q->head == q->tail) {
294 if (!red_is_idling(&q->vars))
295 red_start_of_idle_period(&q->vars);
296 return NULL;
297 }
298
299 skb = q->tab[q->head];
300 q->tab[q->head] = NULL;
301 choke_zap_head_holes(q);
302 --sch->q.qlen;
303 qdisc_qstats_backlog_dec(sch, skb);
304 qdisc_bstats_update(sch, skb);
305
306 return skb;
307}
308
309static void choke_reset(struct Qdisc *sch)
310{
311 struct choke_sched_data *q = qdisc_priv(sch);
312
313 while (q->head != q->tail) {
314 struct sk_buff *skb = q->tab[q->head];
315
316 q->head = (q->head + 1) & q->tab_mask;
317 if (!skb)
318 continue;
319 rtnl_qdisc_drop(skb, sch);
320 }
321
322 sch->q.qlen = 0;
323 sch->qstats.backlog = 0;
324 if (q->tab)
325 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
326 q->head = q->tail = 0;
327 red_restart(&q->vars);
328}
329
330static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
331 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
332 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
333 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
334};
335
336
337static void choke_free(void *addr)
338{
339 kvfree(addr);
340}
341
342static int choke_change(struct Qdisc *sch, struct nlattr *opt,
343 struct netlink_ext_ack *extack)
344{
345 struct choke_sched_data *q = qdisc_priv(sch);
346 struct nlattr *tb[TCA_CHOKE_MAX + 1];
347 const struct tc_red_qopt *ctl;
348 int err;
349 struct sk_buff **old = NULL;
350 unsigned int mask;
351 u32 max_P;
352
353 if (opt == NULL)
354 return -EINVAL;
355
356 err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
357 choke_policy, NULL);
358 if (err < 0)
359 return err;
360
361 if (tb[TCA_CHOKE_PARMS] == NULL ||
362 tb[TCA_CHOKE_STAB] == NULL)
363 return -EINVAL;
364
365 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
366
367 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
368
369 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
370 return -EINVAL;
371
372 if (ctl->limit > CHOKE_MAX_QUEUE)
373 return -EINVAL;
374
375 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
376 if (mask != q->tab_mask) {
377 struct sk_buff **ntab;
378
379 ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
380 if (!ntab)
381 return -ENOMEM;
382
383 sch_tree_lock(sch);
384 old = q->tab;
385 if (old) {
386 unsigned int oqlen = sch->q.qlen, tail = 0;
387 unsigned dropped = 0;
388
389 while (q->head != q->tail) {
390 struct sk_buff *skb = q->tab[q->head];
391
392 q->head = (q->head + 1) & q->tab_mask;
393 if (!skb)
394 continue;
395 if (tail < mask) {
396 ntab[tail++] = skb;
397 continue;
398 }
399 dropped += qdisc_pkt_len(skb);
400 qdisc_qstats_backlog_dec(sch, skb);
401 --sch->q.qlen;
402 rtnl_qdisc_drop(skb, sch);
403 }
404 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
405 q->head = 0;
406 q->tail = tail;
407 }
408
409 q->tab_mask = mask;
410 q->tab = ntab;
411 } else
412 sch_tree_lock(sch);
413
414 q->flags = ctl->flags;
415 q->limit = ctl->limit;
416
417 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
418 ctl->Plog, ctl->Scell_log,
419 nla_data(tb[TCA_CHOKE_STAB]),
420 max_P);
421 red_set_vars(&q->vars);
422
423 if (q->head == q->tail)
424 red_end_of_idle_period(&q->vars);
425
426 sch_tree_unlock(sch);
427 choke_free(old);
428 return 0;
429}
430
431static int choke_init(struct Qdisc *sch, struct nlattr *opt,
432 struct netlink_ext_ack *extack)
433{
434 return choke_change(sch, opt, extack);
435}
436
437static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
438{
439 struct choke_sched_data *q = qdisc_priv(sch);
440 struct nlattr *opts = NULL;
441 struct tc_red_qopt opt = {
442 .limit = q->limit,
443 .flags = q->flags,
444 .qth_min = q->parms.qth_min >> q->parms.Wlog,
445 .qth_max = q->parms.qth_max >> q->parms.Wlog,
446 .Wlog = q->parms.Wlog,
447 .Plog = q->parms.Plog,
448 .Scell_log = q->parms.Scell_log,
449 };
450
451 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
452 if (opts == NULL)
453 goto nla_put_failure;
454
455 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
456 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
457 goto nla_put_failure;
458 return nla_nest_end(skb, opts);
459
460nla_put_failure:
461 nla_nest_cancel(skb, opts);
462 return -EMSGSIZE;
463}
464
465static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
466{
467 struct choke_sched_data *q = qdisc_priv(sch);
468 struct tc_choke_xstats st = {
469 .early = q->stats.prob_drop + q->stats.forced_drop,
470 .marked = q->stats.prob_mark + q->stats.forced_mark,
471 .pdrop = q->stats.pdrop,
472 .other = q->stats.other,
473 .matched = q->stats.matched,
474 };
475
476 return gnet_stats_copy_app(d, &st, sizeof(st));
477}
478
479static void choke_destroy(struct Qdisc *sch)
480{
481 struct choke_sched_data *q = qdisc_priv(sch);
482
483 choke_free(q->tab);
484}
485
486static struct sk_buff *choke_peek_head(struct Qdisc *sch)
487{
488 struct choke_sched_data *q = qdisc_priv(sch);
489
490 return (q->head != q->tail) ? q->tab[q->head] : NULL;
491}
492
493static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
494 .id = "choke",
495 .priv_size = sizeof(struct choke_sched_data),
496
497 .enqueue = choke_enqueue,
498 .dequeue = choke_dequeue,
499 .peek = choke_peek_head,
500 .init = choke_init,
501 .destroy = choke_destroy,
502 .reset = choke_reset,
503 .change = choke_change,
504 .dump = choke_dump,
505 .dump_stats = choke_dump_stats,
506 .owner = THIS_MODULE,
507};
508
509static int __init choke_module_init(void)
510{
511 return register_qdisc(&choke_qdisc_ops);
512}
513
514static void __exit choke_module_exit(void)
515{
516 unregister_qdisc(&choke_qdisc_ops);
517}
518
519module_init(choke_module_init)
520module_exit(choke_module_exit)
521
522MODULE_LICENSE("GPL");
523