1
2
3
4
5
6
7
8
9
10#include <linux/module.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/errno.h>
15#include <linux/skbuff.h>
16#include <net/netlink.h>
17#include <net/sch_generic.h>
18#include <net/pkt_cls.h>
19#include <net/pkt_sched.h>
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97struct tbf_sched_data {
98
99 u32 limit;
100 u32 max_size;
101 s64 buffer;
102 s64 mtu;
103 struct psched_ratecfg rate;
104 struct psched_ratecfg peak;
105
106
107 s64 tokens;
108 s64 ptokens;
109 s64 t_c;
110 struct Qdisc *qdisc;
111 struct qdisc_watchdog watchdog;
112};
113
114
115
116
117
118static u64 psched_ns_t2l(const struct psched_ratecfg *r,
119 u64 time_in_ns)
120{
121
122
123
124 u64 len = time_in_ns * r->rate_bytes_ps;
125
126 do_div(len, NSEC_PER_SEC);
127
128 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
129 do_div(len, 53);
130 len = len * 48;
131 }
132
133 if (len > r->overhead)
134 len -= r->overhead;
135 else
136 len = 0;
137
138 return len;
139}
140
141static void tbf_offload_change(struct Qdisc *sch)
142{
143 struct tbf_sched_data *q = qdisc_priv(sch);
144 struct net_device *dev = qdisc_dev(sch);
145 struct tc_tbf_qopt_offload qopt;
146
147 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
148 return;
149
150 qopt.command = TC_TBF_REPLACE;
151 qopt.handle = sch->handle;
152 qopt.parent = sch->parent;
153 qopt.replace_params.rate = q->rate;
154 qopt.replace_params.max_size = q->max_size;
155 qopt.replace_params.qstats = &sch->qstats;
156
157 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
158}
159
160static void tbf_offload_destroy(struct Qdisc *sch)
161{
162 struct net_device *dev = qdisc_dev(sch);
163 struct tc_tbf_qopt_offload qopt;
164
165 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
166 return;
167
168 qopt.command = TC_TBF_DESTROY;
169 qopt.handle = sch->handle;
170 qopt.parent = sch->parent;
171 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
172}
173
174static int tbf_offload_dump(struct Qdisc *sch)
175{
176 struct tc_tbf_qopt_offload qopt;
177
178 qopt.command = TC_TBF_STATS;
179 qopt.handle = sch->handle;
180 qopt.parent = sch->parent;
181 qopt.stats.bstats = &sch->bstats;
182 qopt.stats.qstats = &sch->qstats;
183
184 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
185}
186
187static void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
188 struct Qdisc *old, struct netlink_ext_ack *extack)
189{
190 struct tc_tbf_qopt_offload graft_offload = {
191 .handle = sch->handle,
192 .parent = sch->parent,
193 .child_handle = new->handle,
194 .command = TC_TBF_GRAFT,
195 };
196
197 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
198 TC_SETUP_QDISC_TBF, &graft_offload, extack);
199}
200
201
202
203
204static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
205 struct sk_buff **to_free)
206{
207 struct tbf_sched_data *q = qdisc_priv(sch);
208 struct sk_buff *segs, *nskb;
209 netdev_features_t features = netif_skb_features(skb);
210 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
211 int ret, nb;
212
213 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
214
215 if (IS_ERR_OR_NULL(segs))
216 return qdisc_drop(skb, sch, to_free);
217
218 nb = 0;
219 skb_list_walk_safe(segs, segs, nskb) {
220 skb_mark_not_on_list(segs);
221 qdisc_skb_cb(segs)->pkt_len = segs->len;
222 len += segs->len;
223 ret = qdisc_enqueue(segs, q->qdisc, to_free);
224 if (ret != NET_XMIT_SUCCESS) {
225 if (net_xmit_drop_count(ret))
226 qdisc_qstats_drop(sch);
227 } else {
228 nb++;
229 }
230 }
231 sch->q.qlen += nb;
232 if (nb > 1)
233 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
234 consume_skb(skb);
235 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
236}
237
238static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
239 struct sk_buff **to_free)
240{
241 struct tbf_sched_data *q = qdisc_priv(sch);
242 unsigned int len = qdisc_pkt_len(skb);
243 int ret;
244
245 if (qdisc_pkt_len(skb) > q->max_size) {
246 if (skb_is_gso(skb) &&
247 skb_gso_validate_mac_len(skb, q->max_size))
248 return tbf_segment(skb, sch, to_free);
249 return qdisc_drop(skb, sch, to_free);
250 }
251 ret = qdisc_enqueue(skb, q->qdisc, to_free);
252 if (ret != NET_XMIT_SUCCESS) {
253 if (net_xmit_drop_count(ret))
254 qdisc_qstats_drop(sch);
255 return ret;
256 }
257
258 sch->qstats.backlog += len;
259 sch->q.qlen++;
260 return NET_XMIT_SUCCESS;
261}
262
263static bool tbf_peak_present(const struct tbf_sched_data *q)
264{
265 return q->peak.rate_bytes_ps;
266}
267
268static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
269{
270 struct tbf_sched_data *q = qdisc_priv(sch);
271 struct sk_buff *skb;
272
273 skb = q->qdisc->ops->peek(q->qdisc);
274
275 if (skb) {
276 s64 now;
277 s64 toks;
278 s64 ptoks = 0;
279 unsigned int len = qdisc_pkt_len(skb);
280
281 now = ktime_get_ns();
282 toks = min_t(s64, now - q->t_c, q->buffer);
283
284 if (tbf_peak_present(q)) {
285 ptoks = toks + q->ptokens;
286 if (ptoks > q->mtu)
287 ptoks = q->mtu;
288 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
289 }
290 toks += q->tokens;
291 if (toks > q->buffer)
292 toks = q->buffer;
293 toks -= (s64) psched_l2t_ns(&q->rate, len);
294
295 if ((toks|ptoks) >= 0) {
296 skb = qdisc_dequeue_peeked(q->qdisc);
297 if (unlikely(!skb))
298 return NULL;
299
300 q->t_c = now;
301 q->tokens = toks;
302 q->ptokens = ptoks;
303 qdisc_qstats_backlog_dec(sch, skb);
304 sch->q.qlen--;
305 qdisc_bstats_update(sch, skb);
306 return skb;
307 }
308
309 qdisc_watchdog_schedule_ns(&q->watchdog,
310 now + max_t(long, -toks, -ptoks));
311
312
313
314
315
316
317
318
319
320
321
322
323 qdisc_qstats_overlimit(sch);
324 }
325 return NULL;
326}
327
328static void tbf_reset(struct Qdisc *sch)
329{
330 struct tbf_sched_data *q = qdisc_priv(sch);
331
332 qdisc_reset(q->qdisc);
333 sch->qstats.backlog = 0;
334 sch->q.qlen = 0;
335 q->t_c = ktime_get_ns();
336 q->tokens = q->buffer;
337 q->ptokens = q->mtu;
338 qdisc_watchdog_cancel(&q->watchdog);
339}
340
341static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
342 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
343 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
344 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
345 [TCA_TBF_RATE64] = { .type = NLA_U64 },
346 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
347 [TCA_TBF_BURST] = { .type = NLA_U32 },
348 [TCA_TBF_PBURST] = { .type = NLA_U32 },
349};
350
351static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
352 struct netlink_ext_ack *extack)
353{
354 int err;
355 struct tbf_sched_data *q = qdisc_priv(sch);
356 struct nlattr *tb[TCA_TBF_MAX + 1];
357 struct tc_tbf_qopt *qopt;
358 struct Qdisc *child = NULL;
359 struct psched_ratecfg rate;
360 struct psched_ratecfg peak;
361 u64 max_size;
362 s64 buffer, mtu;
363 u64 rate64 = 0, prate64 = 0;
364
365 err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
366 NULL);
367 if (err < 0)
368 return err;
369
370 err = -EINVAL;
371 if (tb[TCA_TBF_PARMS] == NULL)
372 goto done;
373
374 qopt = nla_data(tb[TCA_TBF_PARMS]);
375 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
376 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
377 tb[TCA_TBF_RTAB],
378 NULL));
379
380 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
381 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
382 tb[TCA_TBF_PTAB],
383 NULL));
384
385 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
386 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
387
388 if (tb[TCA_TBF_RATE64])
389 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
390 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
391
392 if (tb[TCA_TBF_BURST]) {
393 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
394 buffer = psched_l2t_ns(&rate, max_size);
395 } else {
396 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
397 }
398
399 if (qopt->peakrate.rate) {
400 if (tb[TCA_TBF_PRATE64])
401 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
402 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
403 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
404 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
405 peak.rate_bytes_ps, rate.rate_bytes_ps);
406 err = -EINVAL;
407 goto done;
408 }
409
410 if (tb[TCA_TBF_PBURST]) {
411 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
412 max_size = min_t(u32, max_size, pburst);
413 mtu = psched_l2t_ns(&peak, pburst);
414 } else {
415 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
416 }
417 } else {
418 memset(&peak, 0, sizeof(peak));
419 }
420
421 if (max_size < psched_mtu(qdisc_dev(sch)))
422 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
423 max_size, qdisc_dev(sch)->name,
424 psched_mtu(qdisc_dev(sch)));
425
426 if (!max_size) {
427 err = -EINVAL;
428 goto done;
429 }
430
431 if (q->qdisc != &noop_qdisc) {
432 err = fifo_set_limit(q->qdisc, qopt->limit);
433 if (err)
434 goto done;
435 } else if (qopt->limit > 0) {
436 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
437 extack);
438 if (IS_ERR(child)) {
439 err = PTR_ERR(child);
440 goto done;
441 }
442
443
444 qdisc_hash_add(child, true);
445 }
446
447 sch_tree_lock(sch);
448 if (child) {
449 qdisc_tree_flush_backlog(q->qdisc);
450 qdisc_put(q->qdisc);
451 q->qdisc = child;
452 }
453 q->limit = qopt->limit;
454 if (tb[TCA_TBF_PBURST])
455 q->mtu = mtu;
456 else
457 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
458 q->max_size = max_size;
459 if (tb[TCA_TBF_BURST])
460 q->buffer = buffer;
461 else
462 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
463 q->tokens = q->buffer;
464 q->ptokens = q->mtu;
465
466 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
467 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
468
469 sch_tree_unlock(sch);
470 err = 0;
471
472 tbf_offload_change(sch);
473done:
474 return err;
475}
476
477static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
478 struct netlink_ext_ack *extack)
479{
480 struct tbf_sched_data *q = qdisc_priv(sch);
481
482 qdisc_watchdog_init(&q->watchdog, sch);
483 q->qdisc = &noop_qdisc;
484
485 if (!opt)
486 return -EINVAL;
487
488 q->t_c = ktime_get_ns();
489
490 return tbf_change(sch, opt, extack);
491}
492
493static void tbf_destroy(struct Qdisc *sch)
494{
495 struct tbf_sched_data *q = qdisc_priv(sch);
496
497 qdisc_watchdog_cancel(&q->watchdog);
498 tbf_offload_destroy(sch);
499 qdisc_put(q->qdisc);
500}
501
502static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
503{
504 struct tbf_sched_data *q = qdisc_priv(sch);
505 struct nlattr *nest;
506 struct tc_tbf_qopt opt;
507 int err;
508
509 err = tbf_offload_dump(sch);
510 if (err)
511 return err;
512
513 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
514 if (nest == NULL)
515 goto nla_put_failure;
516
517 opt.limit = q->limit;
518 psched_ratecfg_getrate(&opt.rate, &q->rate);
519 if (tbf_peak_present(q))
520 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
521 else
522 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
523 opt.mtu = PSCHED_NS2TICKS(q->mtu);
524 opt.buffer = PSCHED_NS2TICKS(q->buffer);
525 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
526 goto nla_put_failure;
527 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
528 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
529 TCA_TBF_PAD))
530 goto nla_put_failure;
531 if (tbf_peak_present(q) &&
532 q->peak.rate_bytes_ps >= (1ULL << 32) &&
533 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
534 TCA_TBF_PAD))
535 goto nla_put_failure;
536
537 return nla_nest_end(skb, nest);
538
539nla_put_failure:
540 nla_nest_cancel(skb, nest);
541 return -1;
542}
543
544static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
545 struct sk_buff *skb, struct tcmsg *tcm)
546{
547 struct tbf_sched_data *q = qdisc_priv(sch);
548
549 tcm->tcm_handle |= TC_H_MIN(1);
550 tcm->tcm_info = q->qdisc->handle;
551
552 return 0;
553}
554
555static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
556 struct Qdisc **old, struct netlink_ext_ack *extack)
557{
558 struct tbf_sched_data *q = qdisc_priv(sch);
559
560 if (new == NULL)
561 new = &noop_qdisc;
562
563 *old = qdisc_replace(sch, new, &q->qdisc);
564
565 tbf_offload_graft(sch, new, *old, extack);
566 return 0;
567}
568
569static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
570{
571 struct tbf_sched_data *q = qdisc_priv(sch);
572 return q->qdisc;
573}
574
575static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
576{
577 return 1;
578}
579
580static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
581{
582 if (!walker->stop) {
583 if (walker->count >= walker->skip)
584 if (walker->fn(sch, 1, walker) < 0) {
585 walker->stop = 1;
586 return;
587 }
588 walker->count++;
589 }
590}
591
592static const struct Qdisc_class_ops tbf_class_ops = {
593 .graft = tbf_graft,
594 .leaf = tbf_leaf,
595 .find = tbf_find,
596 .walk = tbf_walk,
597 .dump = tbf_dump_class,
598};
599
600static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
601 .next = NULL,
602 .cl_ops = &tbf_class_ops,
603 .id = "tbf",
604 .priv_size = sizeof(struct tbf_sched_data),
605 .enqueue = tbf_enqueue,
606 .dequeue = tbf_dequeue,
607 .peek = qdisc_peek_dequeued,
608 .init = tbf_init,
609 .reset = tbf_reset,
610 .destroy = tbf_destroy,
611 .change = tbf_change,
612 .dump = tbf_dump,
613 .owner = THIS_MODULE,
614};
615
616static int __init tbf_module_init(void)
617{
618 return register_qdisc(&tbf_qdisc_ops);
619}
620
621static void __exit tbf_module_exit(void)
622{
623 unregister_qdisc(&tbf_qdisc_ops);
624}
625module_init(tbf_module_init)
626module_exit(tbf_module_exit)
627MODULE_LICENSE("GPL");
628