1
2
3
4
5
6
7
8
9
10#include <linux/module.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/errno.h>
15#include <linux/skbuff.h>
16#include <net/netlink.h>
17#include <net/sch_generic.h>
18#include <net/pkt_cls.h>
19#include <net/pkt_sched.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97struct tbf_sched_data {
98
99 u32 limit;
100 u32 max_size;
101 s64 buffer;
102 s64 mtu;
103 struct psched_ratecfg rate;
104 struct psched_ratecfg peak;
105
106
107 s64 tokens;
108 s64 ptokens;
109 s64 t_c;
110 struct Qdisc *qdisc;
111 struct qdisc_watchdog watchdog;
112};
113
114
115
116
117
118static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119 u64 time_in_ns)
120{
121
122
123
124 u64 len = time_in_ns * r->rate_bytes_ps;
125
126 do_div(len, NSEC_PER_SEC);
127
128 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129 do_div(len, 53);
130 len = len * 48;
131 }
132
133 if (len > r->overhead)
134 len -= r->overhead;
135 else
136 len = 0;
137
138 return len;
139}
140
141static void tbf_offload_change(struct Qdisc *sch)
142{
143 struct tbf_sched_data *q = qdisc_priv(sch);
144 struct net_device *dev = qdisc_dev(sch);
145 struct tc_tbf_qopt_offload qopt;
146
147 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148 return;
149
150 qopt.command = TC_TBF_REPLACE;
151 qopt.handle = sch->handle;
152 qopt.parent = sch->parent;
153 qopt.replace_params.rate = q->rate;
154 qopt.replace_params.max_size = q->max_size;
155 qopt.replace_params.qstats = &sch->qstats;
156
157 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158}
159
160static void tbf_offload_destroy(struct Qdisc *sch)
161{
162 struct net_device *dev = qdisc_dev(sch);
163 struct tc_tbf_qopt_offload qopt;
164
165 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166 return;
167
168 qopt.command = TC_TBF_DESTROY;
169 qopt.handle = sch->handle;
170 qopt.parent = sch->parent;
171 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172}
173
174static int tbf_offload_dump(struct Qdisc *sch)
175{
176 struct tc_tbf_qopt_offload qopt;
177
178 qopt.command = TC_TBF_STATS;
179 qopt.handle = sch->handle;
180 qopt.parent = sch->parent;
181 qopt.stats.bstats = &sch->bstats;
182 qopt.stats.qstats = &sch->qstats;
183
184 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185}
186
187
188
189
190static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
191 struct sk_buff **to_free)
192{
193 struct tbf_sched_data *q = qdisc_priv(sch);
194 struct sk_buff *segs, *nskb;
195 netdev_features_t features = netif_skb_features(skb);
196 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
197 int ret, nb;
198
199 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
200
201 if (IS_ERR_OR_NULL(segs))
202 return qdisc_drop(skb, sch, to_free);
203
204 nb = 0;
205 skb_list_walk_safe(segs, segs, nskb) {
206 skb_mark_not_on_list(segs);
207 qdisc_skb_cb(segs)->pkt_len = segs->len;
208 len += segs->len;
209 ret = qdisc_enqueue(segs, q->qdisc, to_free);
210 if (ret != NET_XMIT_SUCCESS) {
211 if (net_xmit_drop_count(ret))
212 qdisc_qstats_drop(sch);
213 } else {
214 nb++;
215 }
216 }
217 sch->q.qlen += nb;
218 if (nb > 1)
219 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
220 consume_skb(skb);
221 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
222}
223
224static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
225 struct sk_buff **to_free)
226{
227 struct tbf_sched_data *q = qdisc_priv(sch);
228 unsigned int len = qdisc_pkt_len(skb);
229 int ret;
230
231 if (qdisc_pkt_len(skb) > q->max_size) {
232 if (skb_is_gso(skb) &&
233 skb_gso_validate_mac_len(skb, q->max_size))
234 return tbf_segment(skb, sch, to_free);
235 return qdisc_drop(skb, sch, to_free);
236 }
237 ret = qdisc_enqueue(skb, q->qdisc, to_free);
238 if (ret != NET_XMIT_SUCCESS) {
239 if (net_xmit_drop_count(ret))
240 qdisc_qstats_drop(sch);
241 return ret;
242 }
243
244 sch->qstats.backlog += len;
245 sch->q.qlen++;
246 return NET_XMIT_SUCCESS;
247}
248
249static bool tbf_peak_present(const struct tbf_sched_data *q)
250{
251 return q->peak.rate_bytes_ps;
252}
253
254static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
255{
256 struct tbf_sched_data *q = qdisc_priv(sch);
257 struct sk_buff *skb;
258
259 skb = q->qdisc->ops->peek(q->qdisc);
260
261 if (skb) {
262 s64 now;
263 s64 toks;
264 s64 ptoks = 0;
265 unsigned int len = qdisc_pkt_len(skb);
266
267 now = ktime_get_ns();
268 toks = min_t(s64, now - q->t_c, q->buffer);
269
270 if (tbf_peak_present(q)) {
271 ptoks = toks + q->ptokens;
272 if (ptoks > q->mtu)
273 ptoks = q->mtu;
274 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
275 }
276 toks += q->tokens;
277 if (toks > q->buffer)
278 toks = q->buffer;
279 toks -= (s64) psched_l2t_ns(&q->rate, len);
280
281 if ((toks|ptoks) >= 0) {
282 skb = qdisc_dequeue_peeked(q->qdisc);
283 if (unlikely(!skb))
284 return NULL;
285
286 q->t_c = now;
287 q->tokens = toks;
288 q->ptokens = ptoks;
289 qdisc_qstats_backlog_dec(sch, skb);
290 sch->q.qlen--;
291 qdisc_bstats_update(sch, skb);
292 return skb;
293 }
294
295 qdisc_watchdog_schedule_ns(&q->watchdog,
296 now + max_t(long, -toks, -ptoks));
297
298
299
300
301
302
303
304
305
306
307
308
309 qdisc_qstats_overlimit(sch);
310 }
311 return NULL;
312}
313
314static void tbf_reset(struct Qdisc *sch)
315{
316 struct tbf_sched_data *q = qdisc_priv(sch);
317
318 qdisc_reset(q->qdisc);
319 sch->qstats.backlog = 0;
320 sch->q.qlen = 0;
321 q->t_c = ktime_get_ns();
322 q->tokens = q->buffer;
323 q->ptokens = q->mtu;
324 qdisc_watchdog_cancel(&q->watchdog);
325}
326
327static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
328 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
329 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
330 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
331 [TCA_TBF_RATE64] = { .type = NLA_U64 },
332 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
333 [TCA_TBF_BURST] = { .type = NLA_U32 },
334 [TCA_TBF_PBURST] = { .type = NLA_U32 },
335};
336
337static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
338 struct netlink_ext_ack *extack)
339{
340 int err;
341 struct tbf_sched_data *q = qdisc_priv(sch);
342 struct nlattr *tb[TCA_TBF_MAX + 1];
343 struct tc_tbf_qopt *qopt;
344 struct Qdisc *child = NULL;
345 struct psched_ratecfg rate;
346 struct psched_ratecfg peak;
347 u64 max_size;
348 s64 buffer, mtu;
349 u64 rate64 = 0, prate64 = 0;
350
351 err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
352 NULL);
353 if (err < 0)
354 return err;
355
356 err = -EINVAL;
357 if (tb[TCA_TBF_PARMS] == NULL)
358 goto done;
359
360 qopt = nla_data(tb[TCA_TBF_PARMS]);
361 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
362 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
363 tb[TCA_TBF_RTAB],
364 NULL));
365
366 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
367 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
368 tb[TCA_TBF_PTAB],
369 NULL));
370
371 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
372 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
373
374 if (tb[TCA_TBF_RATE64])
375 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
376 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
377
378 if (tb[TCA_TBF_BURST]) {
379 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
380 buffer = psched_l2t_ns(&rate, max_size);
381 } else {
382 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
383 }
384
385 if (qopt->peakrate.rate) {
386 if (tb[TCA_TBF_PRATE64])
387 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
388 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
389 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
390 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
391 peak.rate_bytes_ps, rate.rate_bytes_ps);
392 err = -EINVAL;
393 goto done;
394 }
395
396 if (tb[TCA_TBF_PBURST]) {
397 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
398 max_size = min_t(u32, max_size, pburst);
399 mtu = psched_l2t_ns(&peak, pburst);
400 } else {
401 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
402 }
403 } else {
404 memset(&peak, 0, sizeof(peak));
405 }
406
407 if (max_size < psched_mtu(qdisc_dev(sch)))
408 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
409 max_size, qdisc_dev(sch)->name,
410 psched_mtu(qdisc_dev(sch)));
411
412 if (!max_size) {
413 err = -EINVAL;
414 goto done;
415 }
416
417 if (q->qdisc != &noop_qdisc) {
418 err = fifo_set_limit(q->qdisc, qopt->limit);
419 if (err)
420 goto done;
421 } else if (qopt->limit > 0) {
422 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
423 extack);
424 if (IS_ERR(child)) {
425 err = PTR_ERR(child);
426 goto done;
427 }
428
429
430 qdisc_hash_add(child, true);
431 }
432
433 sch_tree_lock(sch);
434 if (child) {
435 qdisc_tree_flush_backlog(q->qdisc);
436 qdisc_put(q->qdisc);
437 q->qdisc = child;
438 }
439 q->limit = qopt->limit;
440 if (tb[TCA_TBF_PBURST])
441 q->mtu = mtu;
442 else
443 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
444 q->max_size = max_size;
445 if (tb[TCA_TBF_BURST])
446 q->buffer = buffer;
447 else
448 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
449 q->tokens = q->buffer;
450 q->ptokens = q->mtu;
451
452 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
453 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
454
455 sch_tree_unlock(sch);
456 err = 0;
457
458 tbf_offload_change(sch);
459done:
460 return err;
461}
462
463static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
464 struct netlink_ext_ack *extack)
465{
466 struct tbf_sched_data *q = qdisc_priv(sch);
467
468 qdisc_watchdog_init(&q->watchdog, sch);
469 q->qdisc = &noop_qdisc;
470
471 if (!opt)
472 return -EINVAL;
473
474 q->t_c = ktime_get_ns();
475
476 return tbf_change(sch, opt, extack);
477}
478
479static void tbf_destroy(struct Qdisc *sch)
480{
481 struct tbf_sched_data *q = qdisc_priv(sch);
482
483 qdisc_watchdog_cancel(&q->watchdog);
484 tbf_offload_destroy(sch);
485 qdisc_put(q->qdisc);
486}
487
488static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
489{
490 struct tbf_sched_data *q = qdisc_priv(sch);
491 struct nlattr *nest;
492 struct tc_tbf_qopt opt;
493 int err;
494
495 err = tbf_offload_dump(sch);
496 if (err)
497 return err;
498
499 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
500 if (nest == NULL)
501 goto nla_put_failure;
502
503 opt.limit = q->limit;
504 psched_ratecfg_getrate(&opt.rate, &q->rate);
505 if (tbf_peak_present(q))
506 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
507 else
508 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
509 opt.mtu = PSCHED_NS2TICKS(q->mtu);
510 opt.buffer = PSCHED_NS2TICKS(q->buffer);
511 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
512 goto nla_put_failure;
513 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
514 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
515 TCA_TBF_PAD))
516 goto nla_put_failure;
517 if (tbf_peak_present(q) &&
518 q->peak.rate_bytes_ps >= (1ULL << 32) &&
519 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
520 TCA_TBF_PAD))
521 goto nla_put_failure;
522
523 return nla_nest_end(skb, nest);
524
525nla_put_failure:
526 nla_nest_cancel(skb, nest);
527 return -1;
528}
529
530static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
531 struct sk_buff *skb, struct tcmsg *tcm)
532{
533 struct tbf_sched_data *q = qdisc_priv(sch);
534
535 tcm->tcm_handle |= TC_H_MIN(1);
536 tcm->tcm_info = q->qdisc->handle;
537
538 return 0;
539}
540
541static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
542 struct Qdisc **old, struct netlink_ext_ack *extack)
543{
544 struct tbf_sched_data *q = qdisc_priv(sch);
545
546 if (new == NULL)
547 new = &noop_qdisc;
548
549 *old = qdisc_replace(sch, new, &q->qdisc);
550 return 0;
551}
552
553static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
554{
555 struct tbf_sched_data *q = qdisc_priv(sch);
556 return q->qdisc;
557}
558
559static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
560{
561 return 1;
562}
563
564static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
565{
566 if (!walker->stop) {
567 if (walker->count >= walker->skip)
568 if (walker->fn(sch, 1, walker) < 0) {
569 walker->stop = 1;
570 return;
571 }
572 walker->count++;
573 }
574}
575
576static const struct Qdisc_class_ops tbf_class_ops = {
577 .graft = tbf_graft,
578 .leaf = tbf_leaf,
579 .find = tbf_find,
580 .walk = tbf_walk,
581 .dump = tbf_dump_class,
582};
583
584static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
585 .next = NULL,
586 .cl_ops = &tbf_class_ops,
587 .id = "tbf",
588 .priv_size = sizeof(struct tbf_sched_data),
589 .enqueue = tbf_enqueue,
590 .dequeue = tbf_dequeue,
591 .peek = qdisc_peek_dequeued,
592 .init = tbf_init,
593 .reset = tbf_reset,
594 .destroy = tbf_destroy,
595 .change = tbf_change,
596 .dump = tbf_dump,
597 .owner = THIS_MODULE,
598};
599
600static int __init tbf_module_init(void)
601{
602 return register_qdisc(&tbf_qdisc_ops);
603}
604
605static void __exit tbf_module_exit(void)
606{
607 unregister_qdisc(&tbf_qdisc_ops);
608}
609module_init(tbf_module_init)
610module_exit(tbf_module_exit)
611MODULE_LICENSE("GPL");
612