1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/skbuff.h>
13#include <linux/vmalloc.h>
14#include <net/pkt_sched.h>
15#include <net/pkt_cls.h>
16#include <net/inet_ecn.h>
17#include <net/red.h>
18#include <net/flow_dissector.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46#define CHOKE_MAX_QUEUE (128*1024 - 1)
47
48struct choke_sched_data {
49
50 u32 limit;
51 unsigned char flags;
52
53 struct red_parms parms;
54
55
56 struct red_vars vars;
57 struct {
58 u32 prob_drop;
59 u32 prob_mark;
60 u32 forced_drop;
61 u32 forced_mark;
62 u32 pdrop;
63 u32 other;
64 u32 matched;
65 } stats;
66
67 unsigned int head;
68 unsigned int tail;
69
70 unsigned int tab_mask;
71
72 struct sk_buff **tab;
73};
74
75
76static unsigned int choke_len(const struct choke_sched_data *q)
77{
78 return (q->tail - q->head) & q->tab_mask;
79}
80
81
82static int use_ecn(const struct choke_sched_data *q)
83{
84 return q->flags & TC_RED_ECN;
85}
86
87
88static int use_harddrop(const struct choke_sched_data *q)
89{
90 return q->flags & TC_RED_HARDDROP;
91}
92
93
94static void choke_zap_head_holes(struct choke_sched_data *q)
95{
96 do {
97 q->head = (q->head + 1) & q->tab_mask;
98 if (q->head == q->tail)
99 break;
100 } while (q->tab[q->head] == NULL);
101}
102
103
104static void choke_zap_tail_holes(struct choke_sched_data *q)
105{
106 do {
107 q->tail = (q->tail - 1) & q->tab_mask;
108 if (q->head == q->tail)
109 break;
110 } while (q->tab[q->tail] == NULL);
111}
112
113
114static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
115 struct sk_buff **to_free)
116{
117 struct choke_sched_data *q = qdisc_priv(sch);
118 struct sk_buff *skb = q->tab[idx];
119
120 q->tab[idx] = NULL;
121
122 if (idx == q->head)
123 choke_zap_head_holes(q);
124 if (idx == q->tail)
125 choke_zap_tail_holes(q);
126
127 qdisc_qstats_backlog_dec(sch, skb);
128 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
129 qdisc_drop(skb, sch, to_free);
130 --sch->q.qlen;
131}
132
133struct choke_skb_cb {
134 u16 classid;
135 u8 keys_valid;
136 struct flow_keys_digest keys;
137};
138
139static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
140{
141 qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
142 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
143}
144
145static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
146{
147 choke_skb_cb(skb)->classid = classid;
148}
149
150
151
152
153
154
155static bool choke_match_flow(struct sk_buff *skb1,
156 struct sk_buff *skb2)
157{
158 struct flow_keys temp;
159
160 if (skb1->protocol != skb2->protocol)
161 return false;
162
163 if (!choke_skb_cb(skb1)->keys_valid) {
164 choke_skb_cb(skb1)->keys_valid = 1;
165 skb_flow_dissect_flow_keys(skb1, &temp, 0);
166 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
167 }
168
169 if (!choke_skb_cb(skb2)->keys_valid) {
170 choke_skb_cb(skb2)->keys_valid = 1;
171 skb_flow_dissect_flow_keys(skb2, &temp, 0);
172 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
173 }
174
175 return !memcmp(&choke_skb_cb(skb1)->keys,
176 &choke_skb_cb(skb2)->keys,
177 sizeof(choke_skb_cb(skb1)->keys));
178}
179
180
181
182
183
184
185
186static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
187 unsigned int *pidx)
188{
189 struct sk_buff *skb;
190 int retrys = 3;
191
192 do {
193 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
194 skb = q->tab[*pidx];
195 if (skb)
196 return skb;
197 } while (--retrys > 0);
198
199 return q->tab[*pidx = q->head];
200}
201
202
203
204
205
206static bool choke_match_random(const struct choke_sched_data *q,
207 struct sk_buff *nskb,
208 unsigned int *pidx)
209{
210 struct sk_buff *oskb;
211
212 if (q->head == q->tail)
213 return false;
214
215 oskb = choke_peek_random(q, pidx);
216 return choke_match_flow(oskb, nskb);
217}
218
219static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
220 struct sk_buff **to_free)
221{
222 struct choke_sched_data *q = qdisc_priv(sch);
223 const struct red_parms *p = &q->parms;
224
225 choke_skb_cb(skb)->keys_valid = 0;
226
227 q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
228 if (red_is_idling(&q->vars))
229 red_end_of_idle_period(&q->vars);
230
231
232 if (q->vars.qavg <= p->qth_min)
233 q->vars.qcount = -1;
234 else {
235 unsigned int idx;
236
237
238 if (choke_match_random(q, skb, &idx)) {
239 q->stats.matched++;
240 choke_drop_by_idx(sch, idx, to_free);
241 goto congestion_drop;
242 }
243
244
245 if (q->vars.qavg > p->qth_max) {
246 q->vars.qcount = -1;
247
248 qdisc_qstats_overlimit(sch);
249 if (use_harddrop(q) || !use_ecn(q) ||
250 !INET_ECN_set_ce(skb)) {
251 q->stats.forced_drop++;
252 goto congestion_drop;
253 }
254
255 q->stats.forced_mark++;
256 } else if (++q->vars.qcount) {
257 if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
258 q->vars.qcount = 0;
259 q->vars.qR = red_random(p);
260
261 qdisc_qstats_overlimit(sch);
262 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
263 q->stats.prob_drop++;
264 goto congestion_drop;
265 }
266
267 q->stats.prob_mark++;
268 }
269 } else
270 q->vars.qR = red_random(p);
271 }
272
273
274 if (sch->q.qlen < q->limit) {
275 q->tab[q->tail] = skb;
276 q->tail = (q->tail + 1) & q->tab_mask;
277 ++sch->q.qlen;
278 qdisc_qstats_backlog_inc(sch, skb);
279 return NET_XMIT_SUCCESS;
280 }
281
282 q->stats.pdrop++;
283 return qdisc_drop(skb, sch, to_free);
284
285congestion_drop:
286 qdisc_drop(skb, sch, to_free);
287 return NET_XMIT_CN;
288}
289
290static struct sk_buff *choke_dequeue(struct Qdisc *sch)
291{
292 struct choke_sched_data *q = qdisc_priv(sch);
293 struct sk_buff *skb;
294
295 if (q->head == q->tail) {
296 if (!red_is_idling(&q->vars))
297 red_start_of_idle_period(&q->vars);
298 return NULL;
299 }
300
301 skb = q->tab[q->head];
302 q->tab[q->head] = NULL;
303 choke_zap_head_holes(q);
304 --sch->q.qlen;
305 qdisc_qstats_backlog_dec(sch, skb);
306 qdisc_bstats_update(sch, skb);
307
308 return skb;
309}
310
311static void choke_reset(struct Qdisc *sch)
312{
313 struct choke_sched_data *q = qdisc_priv(sch);
314
315 while (q->head != q->tail) {
316 struct sk_buff *skb = q->tab[q->head];
317
318 q->head = (q->head + 1) & q->tab_mask;
319 if (!skb)
320 continue;
321 rtnl_qdisc_drop(skb, sch);
322 }
323
324 sch->q.qlen = 0;
325 sch->qstats.backlog = 0;
326 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
327 q->head = q->tail = 0;
328 red_restart(&q->vars);
329}
330
331static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
332 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
333 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
334 [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
335};
336
337
338static void choke_free(void *addr)
339{
340 kvfree(addr);
341}
342
343static int choke_change(struct Qdisc *sch, struct nlattr *opt,
344 struct netlink_ext_ack *extack)
345{
346 struct choke_sched_data *q = qdisc_priv(sch);
347 struct nlattr *tb[TCA_CHOKE_MAX + 1];
348 const struct tc_red_qopt *ctl;
349 int err;
350 struct sk_buff **old = NULL;
351 unsigned int mask;
352 u32 max_P;
353
354 if (opt == NULL)
355 return -EINVAL;
356
357 err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
358 choke_policy, NULL);
359 if (err < 0)
360 return err;
361
362 if (tb[TCA_CHOKE_PARMS] == NULL ||
363 tb[TCA_CHOKE_STAB] == NULL)
364 return -EINVAL;
365
366 max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
367
368 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
369
370 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
371 return -EINVAL;
372
373 if (ctl->limit > CHOKE_MAX_QUEUE)
374 return -EINVAL;
375
376 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
377 if (mask != q->tab_mask) {
378 struct sk_buff **ntab;
379
380 ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
381 if (!ntab)
382 return -ENOMEM;
383
384 sch_tree_lock(sch);
385 old = q->tab;
386 if (old) {
387 unsigned int oqlen = sch->q.qlen, tail = 0;
388 unsigned dropped = 0;
389
390 while (q->head != q->tail) {
391 struct sk_buff *skb = q->tab[q->head];
392
393 q->head = (q->head + 1) & q->tab_mask;
394 if (!skb)
395 continue;
396 if (tail < mask) {
397 ntab[tail++] = skb;
398 continue;
399 }
400 dropped += qdisc_pkt_len(skb);
401 qdisc_qstats_backlog_dec(sch, skb);
402 --sch->q.qlen;
403 rtnl_qdisc_drop(skb, sch);
404 }
405 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
406 q->head = 0;
407 q->tail = tail;
408 }
409
410 q->tab_mask = mask;
411 q->tab = ntab;
412 } else
413 sch_tree_lock(sch);
414
415 q->flags = ctl->flags;
416 q->limit = ctl->limit;
417
418 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
419 ctl->Plog, ctl->Scell_log,
420 nla_data(tb[TCA_CHOKE_STAB]),
421 max_P);
422 red_set_vars(&q->vars);
423
424 if (q->head == q->tail)
425 red_end_of_idle_period(&q->vars);
426
427 sch_tree_unlock(sch);
428 choke_free(old);
429 return 0;
430}
431
432static int choke_init(struct Qdisc *sch, struct nlattr *opt,
433 struct netlink_ext_ack *extack)
434{
435 return choke_change(sch, opt, extack);
436}
437
438static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
439{
440 struct choke_sched_data *q = qdisc_priv(sch);
441 struct nlattr *opts = NULL;
442 struct tc_red_qopt opt = {
443 .limit = q->limit,
444 .flags = q->flags,
445 .qth_min = q->parms.qth_min >> q->parms.Wlog,
446 .qth_max = q->parms.qth_max >> q->parms.Wlog,
447 .Wlog = q->parms.Wlog,
448 .Plog = q->parms.Plog,
449 .Scell_log = q->parms.Scell_log,
450 };
451
452 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
453 if (opts == NULL)
454 goto nla_put_failure;
455
456 if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
457 nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
458 goto nla_put_failure;
459 return nla_nest_end(skb, opts);
460
461nla_put_failure:
462 nla_nest_cancel(skb, opts);
463 return -EMSGSIZE;
464}
465
466static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
467{
468 struct choke_sched_data *q = qdisc_priv(sch);
469 struct tc_choke_xstats st = {
470 .early = q->stats.prob_drop + q->stats.forced_drop,
471 .marked = q->stats.prob_mark + q->stats.forced_mark,
472 .pdrop = q->stats.pdrop,
473 .other = q->stats.other,
474 .matched = q->stats.matched,
475 };
476
477 return gnet_stats_copy_app(d, &st, sizeof(st));
478}
479
480static void choke_destroy(struct Qdisc *sch)
481{
482 struct choke_sched_data *q = qdisc_priv(sch);
483
484 choke_free(q->tab);
485}
486
487static struct sk_buff *choke_peek_head(struct Qdisc *sch)
488{
489 struct choke_sched_data *q = qdisc_priv(sch);
490
491 return (q->head != q->tail) ? q->tab[q->head] : NULL;
492}
493
494static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
495 .id = "choke",
496 .priv_size = sizeof(struct choke_sched_data),
497
498 .enqueue = choke_enqueue,
499 .dequeue = choke_dequeue,
500 .peek = choke_peek_head,
501 .init = choke_init,
502 .destroy = choke_destroy,
503 .reset = choke_reset,
504 .change = choke_change,
505 .dump = choke_dump,
506 .dump_stats = choke_dump_stats,
507 .owner = THIS_MODULE,
508};
509
510static int __init choke_module_init(void)
511{
512 return register_qdisc(&choke_qdisc_ops);
513}
514
515static void __exit choke_module_exit(void)
516{
517 unregister_qdisc(&choke_qdisc_ops);
518}
519
520module_init(choke_module_init)
521module_exit(choke_module_exit)
522
523MODULE_LICENSE("GPL");
524