1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <net/netlink.h>
22#include <net/sch_generic.h>
23#include <net/pkt_sched.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101struct tbf_sched_data {
102
103 u32 limit;
104 u32 max_size;
105 s64 buffer;
106 s64 mtu;
107 struct psched_ratecfg rate;
108 struct psched_ratecfg peak;
109
110
111 s64 tokens;
112 s64 ptokens;
113 s64 t_c;
114 struct Qdisc *qdisc;
115 struct qdisc_watchdog watchdog;
116};
117
118
119
120
121
122static u64 psched_ns_t2l(const struct psched_ratecfg *r,
123 u64 time_in_ns)
124{
125
126
127
128 u64 len = time_in_ns * r->rate_bytes_ps;
129
130 do_div(len, NSEC_PER_SEC);
131
132 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
133 do_div(len, 53);
134 len = len * 48;
135 }
136
137 if (len > r->overhead)
138 len -= r->overhead;
139 else
140 len = 0;
141
142 return len;
143}
144
145
146
147
148
149static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
150{
151 unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
152 return hdr_len + skb_gso_transport_seglen(skb);
153}
154
155
156
157
158static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
159 struct sk_buff **to_free)
160{
161 struct tbf_sched_data *q = qdisc_priv(sch);
162 struct sk_buff *segs, *nskb;
163 netdev_features_t features = netif_skb_features(skb);
164 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
165 int ret, nb;
166
167 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
168
169 if (IS_ERR_OR_NULL(segs))
170 return qdisc_drop(skb, sch, to_free);
171
172 nb = 0;
173 while (segs) {
174 nskb = segs->next;
175 segs->next = NULL;
176 qdisc_skb_cb(segs)->pkt_len = segs->len;
177 len += segs->len;
178 ret = qdisc_enqueue(segs, q->qdisc, to_free);
179 if (ret != NET_XMIT_SUCCESS) {
180 if (net_xmit_drop_count(ret))
181 qdisc_qstats_drop(sch);
182 } else {
183 nb++;
184 }
185 segs = nskb;
186 }
187 sch->q.qlen += nb;
188 if (nb > 1)
189 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
190 consume_skb(skb);
191 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
192}
193
194static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
195 struct sk_buff **to_free)
196{
197 struct tbf_sched_data *q = qdisc_priv(sch);
198 int ret;
199
200 if (qdisc_pkt_len(skb) > q->max_size) {
201 if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size)
202 return tbf_segment(skb, sch, to_free);
203 return qdisc_drop(skb, sch, to_free);
204 }
205 ret = qdisc_enqueue(skb, q->qdisc, to_free);
206 if (ret != NET_XMIT_SUCCESS) {
207 if (net_xmit_drop_count(ret))
208 qdisc_qstats_drop(sch);
209 return ret;
210 }
211
212 qdisc_qstats_backlog_inc(sch, skb);
213 sch->q.qlen++;
214 return NET_XMIT_SUCCESS;
215}
216
217static bool tbf_peak_present(const struct tbf_sched_data *q)
218{
219 return q->peak.rate_bytes_ps;
220}
221
222static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
223{
224 struct tbf_sched_data *q = qdisc_priv(sch);
225 struct sk_buff *skb;
226
227 skb = q->qdisc->ops->peek(q->qdisc);
228
229 if (skb) {
230 s64 now;
231 s64 toks;
232 s64 ptoks = 0;
233 unsigned int len = qdisc_pkt_len(skb);
234
235 now = ktime_get_ns();
236 toks = min_t(s64, now - q->t_c, q->buffer);
237
238 if (tbf_peak_present(q)) {
239 ptoks = toks + q->ptokens;
240 if (ptoks > q->mtu)
241 ptoks = q->mtu;
242 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
243 }
244 toks += q->tokens;
245 if (toks > q->buffer)
246 toks = q->buffer;
247 toks -= (s64) psched_l2t_ns(&q->rate, len);
248
249 if ((toks|ptoks) >= 0) {
250 skb = qdisc_dequeue_peeked(q->qdisc);
251 if (unlikely(!skb))
252 return NULL;
253
254 q->t_c = now;
255 q->tokens = toks;
256 q->ptokens = ptoks;
257 qdisc_qstats_backlog_dec(sch, skb);
258 sch->q.qlen--;
259 qdisc_bstats_update(sch, skb);
260 return skb;
261 }
262
263 qdisc_watchdog_schedule_ns(&q->watchdog,
264 now + max_t(long, -toks, -ptoks));
265
266
267
268
269
270
271
272
273
274
275
276
277 qdisc_qstats_overlimit(sch);
278 }
279 return NULL;
280}
281
282static void tbf_reset(struct Qdisc *sch)
283{
284 struct tbf_sched_data *q = qdisc_priv(sch);
285
286 qdisc_reset(q->qdisc);
287 sch->qstats.backlog = 0;
288 sch->q.qlen = 0;
289 q->t_c = ktime_get_ns();
290 q->tokens = q->buffer;
291 q->ptokens = q->mtu;
292 qdisc_watchdog_cancel(&q->watchdog);
293}
294
295static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
296 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
297 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
298 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
299 [TCA_TBF_RATE64] = { .type = NLA_U64 },
300 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
301 [TCA_TBF_BURST] = { .type = NLA_U32 },
302 [TCA_TBF_PBURST] = { .type = NLA_U32 },
303};
304
305static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
306{
307 int err;
308 struct tbf_sched_data *q = qdisc_priv(sch);
309 struct nlattr *tb[TCA_TBF_MAX + 1];
310 struct tc_tbf_qopt *qopt;
311 struct Qdisc *child = NULL;
312 struct psched_ratecfg rate;
313 struct psched_ratecfg peak;
314 u64 max_size;
315 s64 buffer, mtu;
316 u64 rate64 = 0, prate64 = 0;
317
318 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
319 if (err < 0)
320 return err;
321
322 err = -EINVAL;
323 if (tb[TCA_TBF_PARMS] == NULL)
324 goto done;
325
326 qopt = nla_data(tb[TCA_TBF_PARMS]);
327 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
328 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
329 tb[TCA_TBF_RTAB]));
330
331 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
332 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
333 tb[TCA_TBF_PTAB]));
334
335 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
336 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
337
338 if (tb[TCA_TBF_RATE64])
339 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
340 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
341
342 if (tb[TCA_TBF_BURST]) {
343 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
344 buffer = psched_l2t_ns(&rate, max_size);
345 } else {
346 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
347 }
348
349 if (qopt->peakrate.rate) {
350 if (tb[TCA_TBF_PRATE64])
351 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
352 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
353 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
354 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
355 peak.rate_bytes_ps, rate.rate_bytes_ps);
356 err = -EINVAL;
357 goto done;
358 }
359
360 if (tb[TCA_TBF_PBURST]) {
361 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
362 max_size = min_t(u32, max_size, pburst);
363 mtu = psched_l2t_ns(&peak, pburst);
364 } else {
365 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
366 }
367 } else {
368 memset(&peak, 0, sizeof(peak));
369 }
370
371 if (max_size < psched_mtu(qdisc_dev(sch)))
372 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
373 max_size, qdisc_dev(sch)->name,
374 psched_mtu(qdisc_dev(sch)));
375
376 if (!max_size) {
377 err = -EINVAL;
378 goto done;
379 }
380
381 if (q->qdisc != &noop_qdisc) {
382 err = fifo_set_limit(q->qdisc, qopt->limit);
383 if (err)
384 goto done;
385 } else if (qopt->limit > 0) {
386 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
387 if (IS_ERR(child)) {
388 err = PTR_ERR(child);
389 goto done;
390 }
391 }
392
393 sch_tree_lock(sch);
394 if (child) {
395 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
396 q->qdisc->qstats.backlog);
397 qdisc_destroy(q->qdisc);
398 q->qdisc = child;
399 }
400 q->limit = qopt->limit;
401 if (tb[TCA_TBF_PBURST])
402 q->mtu = mtu;
403 else
404 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
405 q->max_size = max_size;
406 if (tb[TCA_TBF_BURST])
407 q->buffer = buffer;
408 else
409 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
410 q->tokens = q->buffer;
411 q->ptokens = q->mtu;
412
413 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
414 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
415
416 sch_tree_unlock(sch);
417 err = 0;
418done:
419 return err;
420}
421
422static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
423{
424 struct tbf_sched_data *q = qdisc_priv(sch);
425
426 if (opt == NULL)
427 return -EINVAL;
428
429 q->t_c = ktime_get_ns();
430 qdisc_watchdog_init(&q->watchdog, sch);
431 q->qdisc = &noop_qdisc;
432
433 return tbf_change(sch, opt);
434}
435
436static void tbf_destroy(struct Qdisc *sch)
437{
438 struct tbf_sched_data *q = qdisc_priv(sch);
439
440 qdisc_watchdog_cancel(&q->watchdog);
441 qdisc_destroy(q->qdisc);
442}
443
444static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
445{
446 struct tbf_sched_data *q = qdisc_priv(sch);
447 struct nlattr *nest;
448 struct tc_tbf_qopt opt;
449
450 sch->qstats.backlog = q->qdisc->qstats.backlog;
451 nest = nla_nest_start(skb, TCA_OPTIONS);
452 if (nest == NULL)
453 goto nla_put_failure;
454
455 opt.limit = q->limit;
456 psched_ratecfg_getrate(&opt.rate, &q->rate);
457 if (tbf_peak_present(q))
458 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
459 else
460 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
461 opt.mtu = PSCHED_NS2TICKS(q->mtu);
462 opt.buffer = PSCHED_NS2TICKS(q->buffer);
463 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
464 goto nla_put_failure;
465 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
466 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
467 TCA_TBF_PAD))
468 goto nla_put_failure;
469 if (tbf_peak_present(q) &&
470 q->peak.rate_bytes_ps >= (1ULL << 32) &&
471 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
472 TCA_TBF_PAD))
473 goto nla_put_failure;
474
475 return nla_nest_end(skb, nest);
476
477nla_put_failure:
478 nla_nest_cancel(skb, nest);
479 return -1;
480}
481
482static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
483 struct sk_buff *skb, struct tcmsg *tcm)
484{
485 struct tbf_sched_data *q = qdisc_priv(sch);
486
487 tcm->tcm_handle |= TC_H_MIN(1);
488 tcm->tcm_info = q->qdisc->handle;
489
490 return 0;
491}
492
493static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
494 struct Qdisc **old)
495{
496 struct tbf_sched_data *q = qdisc_priv(sch);
497
498 if (new == NULL)
499 new = &noop_qdisc;
500
501 *old = qdisc_replace(sch, new, &q->qdisc);
502 return 0;
503}
504
505static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
506{
507 struct tbf_sched_data *q = qdisc_priv(sch);
508 return q->qdisc;
509}
510
511static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
512{
513 return 1;
514}
515
516static void tbf_put(struct Qdisc *sch, unsigned long arg)
517{
518}
519
520static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
521{
522 if (!walker->stop) {
523 if (walker->count >= walker->skip)
524 if (walker->fn(sch, 1, walker) < 0) {
525 walker->stop = 1;
526 return;
527 }
528 walker->count++;
529 }
530}
531
532static const struct Qdisc_class_ops tbf_class_ops = {
533 .graft = tbf_graft,
534 .leaf = tbf_leaf,
535 .get = tbf_get,
536 .put = tbf_put,
537 .walk = tbf_walk,
538 .dump = tbf_dump_class,
539};
540
541static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
542 .next = NULL,
543 .cl_ops = &tbf_class_ops,
544 .id = "tbf",
545 .priv_size = sizeof(struct tbf_sched_data),
546 .enqueue = tbf_enqueue,
547 .dequeue = tbf_dequeue,
548 .peek = qdisc_peek_dequeued,
549 .init = tbf_init,
550 .reset = tbf_reset,
551 .destroy = tbf_destroy,
552 .change = tbf_change,
553 .dump = tbf_dump,
554 .owner = THIS_MODULE,
555};
556
557static int __init tbf_module_init(void)
558{
559 return register_qdisc(&tbf_qdisc_ops);
560}
561
562static void __exit tbf_module_exit(void)
563{
564 unregister_qdisc(&tbf_qdisc_ops);
565}
566module_init(tbf_module_init)
567module_exit(tbf_module_exit)
568MODULE_LICENSE("GPL");
569