1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <net/netlink.h>
22#include <net/sch_generic.h>
23#include <net/pkt_sched.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101struct tbf_sched_data {
102
103 u32 limit;
104 u32 max_size;
105 s64 buffer;
106 s64 mtu;
107 struct psched_ratecfg rate;
108 struct psched_ratecfg peak;
109
110
111 s64 tokens;
112 s64 ptokens;
113 s64 t_c;
114 struct Qdisc *qdisc;
115 struct qdisc_watchdog watchdog;
116};
117
118
119
120
121
122static u64 psched_ns_t2l(const struct psched_ratecfg *r,
123 u64 time_in_ns)
124{
125
126
127
128 u64 len = time_in_ns * r->rate_bytes_ps;
129
130 do_div(len, NSEC_PER_SEC);
131
132 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
133 do_div(len, 53);
134 len = len * 48;
135 }
136
137 if (len > r->overhead)
138 len -= r->overhead;
139 else
140 len = 0;
141
142 return len;
143}
144
145
146
147
148
149static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
150{
151 unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
152 return hdr_len + skb_gso_transport_seglen(skb);
153}
154
155
156
157
158static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
159 struct sk_buff **to_free)
160{
161 struct tbf_sched_data *q = qdisc_priv(sch);
162 struct sk_buff *segs, *nskb;
163 netdev_features_t features = netif_skb_features(skb);
164 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
165 int ret, nb;
166
167 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
168
169 if (IS_ERR_OR_NULL(segs))
170 return qdisc_drop(skb, sch, to_free);
171
172 nb = 0;
173 while (segs) {
174 nskb = segs->next;
175 segs->next = NULL;
176 qdisc_skb_cb(segs)->pkt_len = segs->len;
177 len += segs->len;
178 ret = qdisc_enqueue(segs, q->qdisc, to_free);
179 if (ret != NET_XMIT_SUCCESS) {
180 if (net_xmit_drop_count(ret))
181 qdisc_qstats_drop(sch);
182 } else {
183 nb++;
184 }
185 segs = nskb;
186 }
187 sch->q.qlen += nb;
188 if (nb > 1)
189 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
190 consume_skb(skb);
191 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
192}
193
194static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
195 struct sk_buff **to_free)
196{
197 struct tbf_sched_data *q = qdisc_priv(sch);
198 int ret;
199
200 if (qdisc_pkt_len(skb) > q->max_size) {
201 if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
202 return tbf_segment(skb, sch, to_free);
203 return qdisc_drop(skb, sch, to_free);
204 }
205 ret = qdisc_enqueue(skb, q->qdisc, to_free);
206 if (ret != NET_XMIT_SUCCESS) {
207 if (net_xmit_drop_count(ret))
208 qdisc_qstats_drop(sch);
209 return ret;
210 }
211
212 qdisc_qstats_backlog_inc(sch, skb);
213 sch->q.qlen++;
214 return NET_XMIT_SUCCESS;
215}
216
217static bool tbf_peak_present(const struct tbf_sched_data *q)
218{
219 return q->peak.rate_bytes_ps;
220}
221
222static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
223{
224 struct tbf_sched_data *q = qdisc_priv(sch);
225 struct sk_buff *skb;
226
227 skb = q->qdisc->ops->peek(q->qdisc);
228
229 if (skb) {
230 s64 now;
231 s64 toks;
232 s64 ptoks = 0;
233 unsigned int len = qdisc_pkt_len(skb);
234
235 now = ktime_get_ns();
236 toks = min_t(s64, now - q->t_c, q->buffer);
237
238 if (tbf_peak_present(q)) {
239 ptoks = toks + q->ptokens;
240 if (ptoks > q->mtu)
241 ptoks = q->mtu;
242 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
243 }
244 toks += q->tokens;
245 if (toks > q->buffer)
246 toks = q->buffer;
247 toks -= (s64) psched_l2t_ns(&q->rate, len);
248
249 if ((toks|ptoks) >= 0) {
250 skb = qdisc_dequeue_peeked(q->qdisc);
251 if (unlikely(!skb))
252 return NULL;
253
254 q->t_c = now;
255 q->tokens = toks;
256 q->ptokens = ptoks;
257 qdisc_qstats_backlog_dec(sch, skb);
258 sch->q.qlen--;
259 qdisc_bstats_update(sch, skb);
260 return skb;
261 }
262
263 qdisc_watchdog_schedule_ns(&q->watchdog,
264 now + max_t(long, -toks, -ptoks));
265
266
267
268
269
270
271
272
273
274
275
276
277 qdisc_qstats_overlimit(sch);
278 }
279 return NULL;
280}
281
282static void tbf_reset(struct Qdisc *sch)
283{
284 struct tbf_sched_data *q = qdisc_priv(sch);
285
286 qdisc_reset(q->qdisc);
287 sch->qstats.backlog = 0;
288 sch->q.qlen = 0;
289 q->t_c = ktime_get_ns();
290 q->tokens = q->buffer;
291 q->ptokens = q->mtu;
292 qdisc_watchdog_cancel(&q->watchdog);
293}
294
295static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
296 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
297 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
298 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
299 [TCA_TBF_RATE64] = { .type = NLA_U64 },
300 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
301 [TCA_TBF_BURST] = { .type = NLA_U32 },
302 [TCA_TBF_PBURST] = { .type = NLA_U32 },
303};
304
305static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
306{
307 int err;
308 struct tbf_sched_data *q = qdisc_priv(sch);
309 struct nlattr *tb[TCA_TBF_MAX + 1];
310 struct tc_tbf_qopt *qopt;
311 struct Qdisc *child = NULL;
312 struct psched_ratecfg rate;
313 struct psched_ratecfg peak;
314 u64 max_size;
315 s64 buffer, mtu;
316 u64 rate64 = 0, prate64 = 0;
317
318 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
319 if (err < 0)
320 return err;
321
322 err = -EINVAL;
323 if (tb[TCA_TBF_PARMS] == NULL)
324 goto done;
325
326 qopt = nla_data(tb[TCA_TBF_PARMS]);
327 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
328 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
329 tb[TCA_TBF_RTAB]));
330
331 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
332 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
333 tb[TCA_TBF_PTAB]));
334
335 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
336 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
337
338 if (tb[TCA_TBF_RATE64])
339 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
340 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
341
342 if (tb[TCA_TBF_BURST]) {
343 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
344 buffer = psched_l2t_ns(&rate, max_size);
345 } else {
346 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
347 }
348
349 if (qopt->peakrate.rate) {
350 if (tb[TCA_TBF_PRATE64])
351 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
352 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
353 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
354 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
355 peak.rate_bytes_ps, rate.rate_bytes_ps);
356 err = -EINVAL;
357 goto done;
358 }
359
360 if (tb[TCA_TBF_PBURST]) {
361 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
362 max_size = min_t(u32, max_size, pburst);
363 mtu = psched_l2t_ns(&peak, pburst);
364 } else {
365 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
366 }
367 } else {
368 memset(&peak, 0, sizeof(peak));
369 }
370
371 if (max_size < psched_mtu(qdisc_dev(sch)))
372 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
373 max_size, qdisc_dev(sch)->name,
374 psched_mtu(qdisc_dev(sch)));
375
376 if (!max_size) {
377 err = -EINVAL;
378 goto done;
379 }
380
381 if (q->qdisc != &noop_qdisc) {
382 err = fifo_set_limit(q->qdisc, qopt->limit);
383 if (err)
384 goto done;
385 } else if (qopt->limit > 0) {
386 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
387 if (IS_ERR(child)) {
388 err = PTR_ERR(child);
389 goto done;
390 }
391 }
392
393 sch_tree_lock(sch);
394 if (child) {
395 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
396 q->qdisc->qstats.backlog);
397 qdisc_destroy(q->qdisc);
398 q->qdisc = child;
399 if (child != &noop_qdisc)
400 qdisc_hash_add(child, true);
401 }
402 q->limit = qopt->limit;
403 if (tb[TCA_TBF_PBURST])
404 q->mtu = mtu;
405 else
406 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
407 q->max_size = max_size;
408 if (tb[TCA_TBF_BURST])
409 q->buffer = buffer;
410 else
411 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
412 q->tokens = q->buffer;
413 q->ptokens = q->mtu;
414
415 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
416 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
417
418 sch_tree_unlock(sch);
419 err = 0;
420done:
421 return err;
422}
423
424static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
425{
426 struct tbf_sched_data *q = qdisc_priv(sch);
427
428 qdisc_watchdog_init(&q->watchdog, sch);
429 q->qdisc = &noop_qdisc;
430
431 if (opt == NULL)
432 return -EINVAL;
433
434 q->t_c = ktime_get_ns();
435
436 return tbf_change(sch, opt);
437}
438
439static void tbf_destroy(struct Qdisc *sch)
440{
441 struct tbf_sched_data *q = qdisc_priv(sch);
442
443 qdisc_watchdog_cancel(&q->watchdog);
444 qdisc_destroy(q->qdisc);
445}
446
447static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
448{
449 struct tbf_sched_data *q = qdisc_priv(sch);
450 struct nlattr *nest;
451 struct tc_tbf_qopt opt;
452
453 sch->qstats.backlog = q->qdisc->qstats.backlog;
454 nest = nla_nest_start(skb, TCA_OPTIONS);
455 if (nest == NULL)
456 goto nla_put_failure;
457
458 opt.limit = q->limit;
459 psched_ratecfg_getrate(&opt.rate, &q->rate);
460 if (tbf_peak_present(q))
461 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
462 else
463 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
464 opt.mtu = PSCHED_NS2TICKS(q->mtu);
465 opt.buffer = PSCHED_NS2TICKS(q->buffer);
466 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
467 goto nla_put_failure;
468 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
469 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
470 TCA_TBF_PAD))
471 goto nla_put_failure;
472 if (tbf_peak_present(q) &&
473 q->peak.rate_bytes_ps >= (1ULL << 32) &&
474 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
475 TCA_TBF_PAD))
476 goto nla_put_failure;
477
478 return nla_nest_end(skb, nest);
479
480nla_put_failure:
481 nla_nest_cancel(skb, nest);
482 return -1;
483}
484
485static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
486 struct sk_buff *skb, struct tcmsg *tcm)
487{
488 struct tbf_sched_data *q = qdisc_priv(sch);
489
490 tcm->tcm_handle |= TC_H_MIN(1);
491 tcm->tcm_info = q->qdisc->handle;
492
493 return 0;
494}
495
496static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
497 struct Qdisc **old)
498{
499 struct tbf_sched_data *q = qdisc_priv(sch);
500
501 if (new == NULL)
502 new = &noop_qdisc;
503
504 *old = qdisc_replace(sch, new, &q->qdisc);
505 return 0;
506}
507
508static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
509{
510 struct tbf_sched_data *q = qdisc_priv(sch);
511 return q->qdisc;
512}
513
514static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
515{
516 return 1;
517}
518
519static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
520{
521 if (!walker->stop) {
522 if (walker->count >= walker->skip)
523 if (walker->fn(sch, 1, walker) < 0) {
524 walker->stop = 1;
525 return;
526 }
527 walker->count++;
528 }
529}
530
531static const struct Qdisc_class_ops tbf_class_ops = {
532 .graft = tbf_graft,
533 .leaf = tbf_leaf,
534 .find = tbf_find,
535 .walk = tbf_walk,
536 .dump = tbf_dump_class,
537};
538
539static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
540 .next = NULL,
541 .cl_ops = &tbf_class_ops,
542 .id = "tbf",
543 .priv_size = sizeof(struct tbf_sched_data),
544 .enqueue = tbf_enqueue,
545 .dequeue = tbf_dequeue,
546 .peek = qdisc_peek_dequeued,
547 .init = tbf_init,
548 .reset = tbf_reset,
549 .destroy = tbf_destroy,
550 .change = tbf_change,
551 .dump = tbf_dump,
552 .owner = THIS_MODULE,
553};
554
555static int __init tbf_module_init(void)
556{
557 return register_qdisc(&tbf_qdisc_ops);
558}
559
560static void __exit tbf_module_exit(void)
561{
562 unregister_qdisc(&tbf_qdisc_ops);
563}
564module_init(tbf_module_init)
565module_exit(tbf_module_exit)
566MODULE_LICENSE("GPL");
567