1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/skbuff.h>
13#include <linux/vmalloc.h>
14#include <net/pkt_sched.h>
15#include <net/pkt_cls.h>
16#include <net/inet_ecn.h>
17#include <net/red.h>
18#include <net/flow_dissector.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46#define CHOKE_MAX_QUEUE (128*1024 - 1)
47
48struct choke_sched_data {
49
50 u32 limit;
51 unsigned char flags;
52
53 struct red_parms parms;
54
55
56 struct red_vars vars;
57 struct {
58 u32 prob_drop;
59 u32 prob_mark;
60 u32 forced_drop;
61 u32 forced_mark;
62 u32 pdrop;
63 u32 other;
64 u32 matched;
65 } stats;
66
67 unsigned int head;
68 unsigned int tail;
69
70 unsigned int tab_mask;
71
72 struct sk_buff **tab;
73};
74
75
76static unsigned int choke_len(const struct choke_sched_data *q)
77{
78 return (q->tail - q->head) & q->tab_mask;
79}
80
81
82static int use_ecn(const struct choke_sched_data *q)
83{
84 return q->flags & TC_RED_ECN;
85}
86
87
88static int use_harddrop(const struct choke_sched_data *q)
89{
90 return q->flags & TC_RED_HARDDROP;
91}
92
93
94static void choke_zap_head_holes(struct choke_sched_data *q)
95{
96 do {
97 q->head = (q->head + 1) & q->tab_mask;
98 if (q->head == q->tail)
99 break;
100 } while (q->tab[q->head] == NULL);
101}
102
103
104static void choke_zap_tail_holes(struct choke_sched_data *q)
105{
106 do {
107 q->tail = (q->tail - 1) & q->tab_mask;
108 if (q->head == q->tail)
109 break;
110 } while (q->tab[q->tail] == NULL);
111}
112
113
114static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
115 struct sk_buff **to_free)
116{
117 struct choke_sched_data *q = qdisc_priv(sch);
118 struct sk_buff *skb = q->tab[idx];
119
120 q->tab[idx] = NULL;
121
122 if (idx == q->head)
123 choke_zap_head_holes(q);
124 if (idx == q->tail)
125 choke_zap_tail_holes(q);
126
127 qdisc_qstats_backlog_dec(sch, skb);
128 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
129 qdisc_drop(skb, sch, to_free);
130 --sch->q.qlen;
131}
132
133struct choke_skb_cb {
134 u8 keys_valid;
135 struct flow_keys_digest keys;
136};
137
138static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
139{
140 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
141 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
142}
143
144
145
146
147
148
149static bool choke_match_flow(struct sk_buff *skb1,
150 struct sk_buff *skb2)
151{
152 struct flow_keys temp;
153
154 if (skb1->protocol != skb2->protocol)
155 return false;
156
157 if (!choke_skb_cb(skb1)->keys_valid) {
158 choke_skb_cb(skb1)->keys_valid = 1;
159 skb_flow_dissect_flow_keys(skb1, &temp, 0);
160 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
161 }
162
163 if (!choke_skb_cb(skb2)->keys_valid) {
164 choke_skb_cb(skb2)->keys_valid = 1;
165 skb_flow_dissect_flow_keys(skb2, &temp, 0);
166 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
167 }
168
169 return !memcmp(&choke_skb_cb(skb1)->keys,
170 &choke_skb_cb(skb2)->keys,
171 sizeof(choke_skb_cb(skb1)->keys));
172}
173
174
175
176
177
178
179
180static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
181 unsigned int *pidx)
182{
183 struct sk_buff *skb;
184 int retrys = 3;
185
186 do {
187 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
188 skb = q->tab[*pidx];
189 if (skb)
190 return skb;
191 } while (--retrys > 0);
192
193 return q->tab[*pidx = q->head];
194}
195
196
197
198
199
200static bool choke_match_random(const struct choke_sched_data *q,
201 struct sk_buff *nskb,
202 unsigned int *pidx)
203{
204 struct sk_buff *oskb;
205
206 if (q->head == q->tail)
207 return false;
208
209 oskb = choke_peek_random(q, pidx);
210 return choke_match_flow(oskb, nskb);
211}
212
213static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
214 struct sk_buff **to_free)
215{
216 struct choke_sched_data *q = qdisc_priv(sch);
217 const struct red_parms *p = &q->parms;
218
219 choke_skb_cb(skb)->keys_valid = 0;
220
221 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
222 if (red_is_idling(&q->vars))
223 red_end_of_idle_period(&q->vars);
224
225
226 if (q->vars.qavg <= p->qth_min)
227 q->vars.qcount = -1;
228 else {
229 unsigned int idx;
230
231
232 if (choke_match_random(q, skb, &idx)) {
233 q->stats.matched++;
234 choke_drop_by_idx(sch, idx, to_free);
235 goto congestion_drop;
236 }
237
238
239 if (q->vars.qavg > p->qth_max) {
240 q->vars.qcount = -1;
241
242 qdisc_qstats_overlimit(sch);
243 if (use_harddrop(q) || !use_ecn(q) ||
244 !INET_ECN_set_ce(skb)) {
245 q->stats.forced_drop++;
246 goto congestion_drop;
247 }
248
249 q->stats.forced_mark++;
250 } else if (++q->vars.qcount) {
251 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
252 q->vars.qcount = 0;
253 q->vars.qR = red_random(p);
254
255 qdisc_qstats_overlimit(sch);
256 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
257 q->stats.prob_drop++;
258 goto congestion_drop;
259 }
260
261 q->stats.prob_mark++;
262 }
263 } else
264 q->vars.qR = red_random(p);
265 }
266
267
268 if (sch->q.qlen < q->limit) {
269 q->tab[q->tail] = skb;
270 q->tail = (q->tail + 1) & q->tab_mask;
271 ++sch->q.qlen;
272 qdisc_qstats_backlog_inc(sch, skb);
273 return NET_XMIT_SUCCESS;
274 }
275
276 q->stats.pdrop++;
277 return qdisc_drop(skb, sch, to_free);
278
279congestion_drop:
280 qdisc_drop(skb, sch, to_free);
281 return NET_XMIT_CN;
282}
283
284static struct sk_buff *choke_dequeue(struct Qdisc *sch)
285{
286 struct choke_sched_data *q = qdisc_priv(sch);
287 struct sk_buff *skb;
288
289 if (q->head == q->tail) {
290 if (!red_is_idling(&q->vars))
291 red_start_of_idle_period(&q->vars);
292 return NULL;
293 }
294
295 skb = q->tab[q->head];
296 q->tab[q->head] = NULL;
297 choke_zap_head_holes(q);
298 --sch->q.qlen;
299 qdisc_qstats_backlog_dec(sch, skb);
300 qdisc_bstats_update(sch, skb);
301
302 return skb;
303}
304
305static void choke_reset(struct Qdisc *sch)
306{
307 struct choke_sched_data *q = qdisc_priv(sch);
308
309 while (q->head != q->tail) {
310 struct sk_buff *skb = q->tab[q->head];
311
312 q->head = (q->head + 1) & q->tab_mask;
313 if (!skb)
314 continue;
315 rtnl_qdisc_drop(skb, sch);
316 }
317
318 sch->q.qlen = 0;
319 sch->qstats.backlog = 0;
320 if (q->tab)
321 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
322 q->head = q->tail = 0;
323 red_restart(&q->vars);
324}
325
326static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
327 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
328 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
329 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
330};
331
332
333static void choke_free(void *addr)
334{
335 kvfree(addr);
336}
337
338static int choke_change(struct Qdisc *sch, struct nlattr *opt,
339 struct netlink_ext_ack *extack)
340{
341 struct choke_sched_data *q = qdisc_priv(sch);
342 struct nlattr *tb[TCA_CHOKE_MAX + 1];
343 const struct tc_red_qopt *ctl;
344 int err;
345 struct sk_buff **old = NULL;
346 unsigned int mask;
347 u32 max_P;
348 u8 *stab;
349
350 if (opt == NULL)
351 return -EINVAL;
352
353 err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
354 choke_policy, NULL);
355 if (err < 0)
356 return err;
357
358 if (tb[TCA_CHOKE_PARMS] == NULL ||
359 tb[TCA_CHOKE_STAB] == NULL)
360 return -EINVAL;
361
362 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
363
364 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
365 stab = nla_data(tb[TCA_CHOKE_STAB]);
366 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
367 return -EINVAL;
368
369 if (ctl->limit > CHOKE_MAX_QUEUE)
370 return -EINVAL;
371
372 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
373 if (mask != q->tab_mask) {
374 struct sk_buff **ntab;
375
376 ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
377 if (!ntab)
378 return -ENOMEM;
379
380 sch_tree_lock(sch);
381 old = q->tab;
382 if (old) {
383 unsigned int oqlen = sch->q.qlen, tail = 0;
384 unsigned dropped = 0;
385
386 while (q->head != q->tail) {
387 struct sk_buff *skb = q->tab[q->head];
388
389 q->head = (q->head + 1) & q->tab_mask;
390 if (!skb)
391 continue;
392 if (tail < mask) {
393 ntab[tail++] = skb;
394 continue;
395 }
396 dropped += qdisc_pkt_len(skb);
397 qdisc_qstats_backlog_dec(sch, skb);
398 --sch->q.qlen;
399 rtnl_qdisc_drop(skb, sch);
400 }
401 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
402 q->head = 0;
403 q->tail = tail;
404 }
405
406 q->tab_mask = mask;
407 q->tab = ntab;
408 } else
409 sch_tree_lock(sch);
410
411 q->flags = ctl->flags;
412 q->limit = ctl->limit;
413
414 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
415 ctl->Plog, ctl->Scell_log,
416 stab,
417 max_P);
418 red_set_vars(&q->vars);
419
420 if (q->head == q->tail)
421 red_end_of_idle_period(&q->vars);
422
423 sch_tree_unlock(sch);
424 choke_free(old);
425 return 0;
426}
427
428static int choke_init(struct Qdisc *sch, struct nlattr *opt,
429 struct netlink_ext_ack *extack)
430{
431 return choke_change(sch, opt, extack);
432}
433
434static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
435{
436 struct choke_sched_data *q = qdisc_priv(sch);
437 struct nlattr *opts = NULL;
438 struct tc_red_qopt opt = {
439 .limit = q->limit,
440 .flags = q->flags,
441 .qth_min = q->parms.qth_min >> q->parms.Wlog,
442 .qth_max = q->parms.qth_max >> q->parms.Wlog,
443 .Wlog = q->parms.Wlog,
444 .Plog = q->parms.Plog,
445 .Scell_log = q->parms.Scell_log,
446 };
447
448 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
449 if (opts == NULL)
450 goto nla_put_failure;
451
452 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
453 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
454 goto nla_put_failure;
455 return nla_nest_end(skb, opts);
456
457nla_put_failure:
458 nla_nest_cancel(skb, opts);
459 return -EMSGSIZE;
460}
461
462static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
463{
464 struct choke_sched_data *q = qdisc_priv(sch);
465 struct tc_choke_xstats st = {
466 .early = q->stats.prob_drop + q->stats.forced_drop,
467 .marked = q->stats.prob_mark + q->stats.forced_mark,
468 .pdrop = q->stats.pdrop,
469 .other = q->stats.other,
470 .matched = q->stats.matched,
471 };
472
473 return gnet_stats_copy_app(d, &st, sizeof(st));
474}
475
476static void choke_destroy(struct Qdisc *sch)
477{
478 struct choke_sched_data *q = qdisc_priv(sch);
479
480 choke_free(q->tab);
481}
482
483static struct sk_buff *choke_peek_head(struct Qdisc *sch)
484{
485 struct choke_sched_data *q = qdisc_priv(sch);
486
487 return (q->head != q->tail) ? q->tab[q->head] : NULL;
488}
489
490static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
491 .id = "choke",
492 .priv_size = sizeof(struct choke_sched_data),
493
494 .enqueue = choke_enqueue,
495 .dequeue = choke_dequeue,
496 .peek = choke_peek_head,
497 .init = choke_init,
498 .destroy = choke_destroy,
499 .reset = choke_reset,
500 .change = choke_change,
501 .dump = choke_dump,
502 .dump_stats = choke_dump_stats,
503 .owner = THIS_MODULE,
504};
505
506static int __init choke_module_init(void)
507{
508 return register_qdisc(&choke_qdisc_ops);
509}
510
511static void __exit choke_module_exit(void)
512{
513 unregister_qdisc(&choke_qdisc_ops);
514}
515
516module_init(choke_module_init)
517module_exit(choke_module_exit)
518
519MODULE_LICENSE("GPL");
520