1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/module.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/skbuff.h>
21#include <net/pkt_sched.h>
22#include <net/pkt_cls.h>
23#include <net/inet_ecn.h>
24#include <net/red.h>
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40struct red_sched_data {
41 u32 limit;
42 unsigned char flags;
43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
45 struct red_parms parms;
46 struct red_vars vars;
47 struct red_stats stats;
48 struct Qdisc *qdisc;
49};
50
51static inline int red_use_ecn(struct red_sched_data *q)
52{
53 return q->flags & TC_RED_ECN;
54}
55
56static inline int red_use_harddrop(struct red_sched_data *q)
57{
58 return q->flags & TC_RED_HARDDROP;
59}
60
61static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 struct sk_buff **to_free)
63{
64 struct red_sched_data *q = qdisc_priv(sch);
65 struct Qdisc *child = q->qdisc;
66 int ret;
67
68 q->vars.qavg = red_calc_qavg(&q->parms,
69 &q->vars,
70 child->qstats.backlog);
71
72 if (red_is_idling(&q->vars))
73 red_end_of_idle_period(&q->vars);
74
75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 case RED_DONT_MARK:
77 break;
78
79 case RED_PROB_MARK:
80 qdisc_qstats_overlimit(sch);
81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 q->stats.prob_drop++;
83 goto congestion_drop;
84 }
85
86 q->stats.prob_mark++;
87 break;
88
89 case RED_HARD_MARK:
90 qdisc_qstats_overlimit(sch);
91 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 !INET_ECN_set_ce(skb)) {
93 q->stats.forced_drop++;
94 goto congestion_drop;
95 }
96
97 q->stats.forced_mark++;
98 break;
99 }
100
101 ret = qdisc_enqueue(skb, child, to_free);
102 if (likely(ret == NET_XMIT_SUCCESS)) {
103 qdisc_qstats_backlog_inc(sch, skb);
104 sch->q.qlen++;
105 } else if (net_xmit_drop_count(ret)) {
106 q->stats.pdrop++;
107 qdisc_qstats_drop(sch);
108 }
109 return ret;
110
111congestion_drop:
112 qdisc_drop(skb, sch, to_free);
113 return NET_XMIT_CN;
114}
115
116static struct sk_buff *red_dequeue(struct Qdisc *sch)
117{
118 struct sk_buff *skb;
119 struct red_sched_data *q = qdisc_priv(sch);
120 struct Qdisc *child = q->qdisc;
121
122 skb = child->dequeue(child);
123 if (skb) {
124 qdisc_bstats_update(sch, skb);
125 qdisc_qstats_backlog_dec(sch, skb);
126 sch->q.qlen--;
127 } else {
128 if (!red_is_idling(&q->vars))
129 red_start_of_idle_period(&q->vars);
130 }
131 return skb;
132}
133
134static struct sk_buff *red_peek(struct Qdisc *sch)
135{
136 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc;
138
139 return child->ops->peek(child);
140}
141
142static void red_reset(struct Qdisc *sch)
143{
144 struct red_sched_data *q = qdisc_priv(sch);
145
146 qdisc_reset(q->qdisc);
147 sch->qstats.backlog = 0;
148 sch->q.qlen = 0;
149 red_restart(&q->vars);
150}
151
152static int red_offload(struct Qdisc *sch, bool enable)
153{
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP;
163
164 if (enable) {
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
170 opt.set.qstats = &sch->qstats;
171 } else {
172 opt.command = TC_RED_DESTROY;
173 }
174
175 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176}
177
178static void red_destroy(struct Qdisc *sch)
179{
180 struct red_sched_data *q = qdisc_priv(sch);
181
182 del_timer_sync(&q->adapt_timer);
183 red_offload(sch, false);
184 qdisc_destroy(q->qdisc);
185}
186
187static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
189 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
190 [TCA_RED_MAX_P] = { .type = NLA_U32 },
191};
192
193static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 struct netlink_ext_ack *extack)
195{
196 struct red_sched_data *q = qdisc_priv(sch);
197 struct nlattr *tb[TCA_RED_MAX + 1];
198 struct tc_red_qopt *ctl;
199 struct Qdisc *child = NULL;
200 int err;
201 u32 max_P;
202
203 if (opt == NULL)
204 return -EINVAL;
205
206 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
207 if (err < 0)
208 return err;
209
210 if (tb[TCA_RED_PARMS] == NULL ||
211 tb[TCA_RED_STAB] == NULL)
212 return -EINVAL;
213
214 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
215
216 ctl = nla_data(tb[TCA_RED_PARMS]);
217 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
218 return -EINVAL;
219
220 if (ctl->limit > 0) {
221 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
222 extack);
223 if (IS_ERR(child))
224 return PTR_ERR(child);
225
226
227 qdisc_hash_add(child, true);
228 }
229
230 sch_tree_lock(sch);
231 q->flags = ctl->flags;
232 q->limit = ctl->limit;
233 if (child) {
234 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
235 q->qdisc->qstats.backlog);
236 qdisc_destroy(q->qdisc);
237 q->qdisc = child;
238 }
239
240 red_set_parms(&q->parms,
241 ctl->qth_min, ctl->qth_max, ctl->Wlog,
242 ctl->Plog, ctl->Scell_log,
243 nla_data(tb[TCA_RED_STAB]),
244 max_P);
245 red_set_vars(&q->vars);
246
247 del_timer(&q->adapt_timer);
248 if (ctl->flags & TC_RED_ADAPTATIVE)
249 mod_timer(&q->adapt_timer, jiffies + HZ/2);
250
251 if (!q->qdisc->q.qlen)
252 red_start_of_idle_period(&q->vars);
253
254 sch_tree_unlock(sch);
255 red_offload(sch, true);
256 return 0;
257}
258
259static inline void red_adaptative_timer(struct timer_list *t)
260{
261 struct red_sched_data *q = from_timer(q, t, adapt_timer);
262 struct Qdisc *sch = q->sch;
263 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
264
265 spin_lock(root_lock);
266 red_adaptative_algo(&q->parms, &q->vars);
267 mod_timer(&q->adapt_timer, jiffies + HZ/2);
268 spin_unlock(root_lock);
269}
270
271static int red_init(struct Qdisc *sch, struct nlattr *opt,
272 struct netlink_ext_ack *extack)
273{
274 struct red_sched_data *q = qdisc_priv(sch);
275
276 q->qdisc = &noop_qdisc;
277 q->sch = sch;
278 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
279 return red_change(sch, opt, extack);
280}
281
282static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
283{
284 struct net_device *dev = qdisc_dev(sch);
285 struct tc_red_qopt_offload hw_stats = {
286 .command = TC_RED_STATS,
287 .handle = sch->handle,
288 .parent = sch->parent,
289 {
290 .stats.bstats = &sch->bstats,
291 .stats.qstats = &sch->qstats,
292 },
293 };
294 int err;
295
296 sch->flags &= ~TCQ_F_OFFLOADED;
297
298 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
299 return 0;
300
301 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 &hw_stats);
303 if (err == -EOPNOTSUPP)
304 return 0;
305
306 if (!err)
307 sch->flags |= TCQ_F_OFFLOADED;
308
309 return err;
310}
311
312static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
313{
314 struct red_sched_data *q = qdisc_priv(sch);
315 struct nlattr *opts = NULL;
316 struct tc_red_qopt opt = {
317 .limit = q->limit,
318 .flags = q->flags,
319 .qth_min = q->parms.qth_min >> q->parms.Wlog,
320 .qth_max = q->parms.qth_max >> q->parms.Wlog,
321 .Wlog = q->parms.Wlog,
322 .Plog = q->parms.Plog,
323 .Scell_log = q->parms.Scell_log,
324 };
325 int err;
326
327 err = red_dump_offload_stats(sch, &opt);
328 if (err)
329 goto nla_put_failure;
330
331 opts = nla_nest_start(skb, TCA_OPTIONS);
332 if (opts == NULL)
333 goto nla_put_failure;
334 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
335 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
336 goto nla_put_failure;
337 return nla_nest_end(skb, opts);
338
339nla_put_failure:
340 nla_nest_cancel(skb, opts);
341 return -EMSGSIZE;
342}
343
344static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
345{
346 struct red_sched_data *q = qdisc_priv(sch);
347 struct net_device *dev = qdisc_dev(sch);
348 struct tc_red_xstats st = {0};
349
350 if (sch->flags & TCQ_F_OFFLOADED) {
351 struct tc_red_qopt_offload hw_stats_request = {
352 .command = TC_RED_XSTATS,
353 .handle = sch->handle,
354 .parent = sch->parent,
355 {
356 .xstats = &q->stats,
357 },
358 };
359 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
360 &hw_stats_request);
361 }
362 st.early = q->stats.prob_drop + q->stats.forced_drop;
363 st.pdrop = q->stats.pdrop;
364 st.other = q->stats.other;
365 st.marked = q->stats.prob_mark + q->stats.forced_mark;
366
367 return gnet_stats_copy_app(d, &st, sizeof(st));
368}
369
370static int red_dump_class(struct Qdisc *sch, unsigned long cl,
371 struct sk_buff *skb, struct tcmsg *tcm)
372{
373 struct red_sched_data *q = qdisc_priv(sch);
374
375 tcm->tcm_handle |= TC_H_MIN(1);
376 tcm->tcm_info = q->qdisc->handle;
377 return 0;
378}
379
380static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
381 struct Qdisc **old, struct netlink_ext_ack *extack)
382{
383 struct red_sched_data *q = qdisc_priv(sch);
384
385 if (new == NULL)
386 new = &noop_qdisc;
387
388 *old = qdisc_replace(sch, new, &q->qdisc);
389 return 0;
390}
391
392static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
393{
394 struct red_sched_data *q = qdisc_priv(sch);
395 return q->qdisc;
396}
397
398static unsigned long red_find(struct Qdisc *sch, u32 classid)
399{
400 return 1;
401}
402
403static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
404{
405 if (!walker->stop) {
406 if (walker->count >= walker->skip)
407 if (walker->fn(sch, 1, walker) < 0) {
408 walker->stop = 1;
409 return;
410 }
411 walker->count++;
412 }
413}
414
415static const struct Qdisc_class_ops red_class_ops = {
416 .graft = red_graft,
417 .leaf = red_leaf,
418 .find = red_find,
419 .walk = red_walk,
420 .dump = red_dump_class,
421};
422
423static struct Qdisc_ops red_qdisc_ops __read_mostly = {
424 .id = "red",
425 .priv_size = sizeof(struct red_sched_data),
426 .cl_ops = &red_class_ops,
427 .enqueue = red_enqueue,
428 .dequeue = red_dequeue,
429 .peek = red_peek,
430 .init = red_init,
431 .reset = red_reset,
432 .destroy = red_destroy,
433 .change = red_change,
434 .dump = red_dump,
435 .dump_stats = red_dump_stats,
436 .owner = THIS_MODULE,
437};
438
439static int __init red_module_init(void)
440{
441 return register_qdisc(&red_qdisc_ops);
442}
443
444static void __exit red_module_exit(void)
445{
446 unregister_qdisc(&red_qdisc_ops);
447}
448
449module_init(red_module_init)
450module_exit(red_module_exit)
451
452MODULE_LICENSE("GPL");
453