1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <net/netlink.h>
22#include <net/sch_generic.h>
23#include <net/pkt_sched.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101struct tbf_sched_data {
102
103 u32 limit;
104 u32 max_size;
105 s64 buffer;
106 s64 mtu;
107 struct psched_ratecfg rate;
108 struct psched_ratecfg peak;
109
110
111 s64 tokens;
112 s64 ptokens;
113 s64 t_c;
114 struct Qdisc *qdisc;
115 struct qdisc_watchdog watchdog;
116};
117
118
119
120
121
122static u64 psched_ns_t2l(const struct psched_ratecfg *r,
123 u64 time_in_ns)
124{
125
126
127
128 u64 len = time_in_ns * r->rate_bytes_ps;
129
130 do_div(len, NSEC_PER_SEC);
131
132 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
133 do_div(len, 53);
134 len = len * 48;
135 }
136
137 if (len > r->overhead)
138 len -= r->overhead;
139 else
140 len = 0;
141
142 return len;
143}
144
145
146
147
148static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
149 struct sk_buff **to_free)
150{
151 struct tbf_sched_data *q = qdisc_priv(sch);
152 struct sk_buff *segs, *nskb;
153 netdev_features_t features = netif_skb_features(skb);
154 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
155 int ret, nb;
156
157 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
158
159 if (IS_ERR_OR_NULL(segs))
160 return qdisc_drop(skb, sch, to_free);
161
162 nb = 0;
163 while (segs) {
164 nskb = segs->next;
165 segs->next = NULL;
166 qdisc_skb_cb(segs)->pkt_len = segs->len;
167 len += segs->len;
168 ret = qdisc_enqueue(segs, q->qdisc, to_free);
169 if (ret != NET_XMIT_SUCCESS) {
170 if (net_xmit_drop_count(ret))
171 qdisc_qstats_drop(sch);
172 } else {
173 nb++;
174 }
175 segs = nskb;
176 }
177 sch->q.qlen += nb;
178 if (nb > 1)
179 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
180 consume_skb(skb);
181 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
182}
183
184static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
185 struct sk_buff **to_free)
186{
187 struct tbf_sched_data *q = qdisc_priv(sch);
188 int ret;
189
190 if (qdisc_pkt_len(skb) > q->max_size) {
191 if (skb_is_gso(skb) &&
192 skb_gso_validate_mac_len(skb, q->max_size))
193 return tbf_segment(skb, sch, to_free);
194 return qdisc_drop(skb, sch, to_free);
195 }
196 ret = qdisc_enqueue(skb, q->qdisc, to_free);
197 if (ret != NET_XMIT_SUCCESS) {
198 if (net_xmit_drop_count(ret))
199 qdisc_qstats_drop(sch);
200 return ret;
201 }
202
203 qdisc_qstats_backlog_inc(sch, skb);
204 sch->q.qlen++;
205 return NET_XMIT_SUCCESS;
206}
207
208static bool tbf_peak_present(const struct tbf_sched_data *q)
209{
210 return q->peak.rate_bytes_ps;
211}
212
213static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
214{
215 struct tbf_sched_data *q = qdisc_priv(sch);
216 struct sk_buff *skb;
217
218 skb = q->qdisc->ops->peek(q->qdisc);
219
220 if (skb) {
221 s64 now;
222 s64 toks;
223 s64 ptoks = 0;
224 unsigned int len = qdisc_pkt_len(skb);
225
226 now = ktime_get_ns();
227 toks = min_t(s64, now - q->t_c, q->buffer);
228
229 if (tbf_peak_present(q)) {
230 ptoks = toks + q->ptokens;
231 if (ptoks > q->mtu)
232 ptoks = q->mtu;
233 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
234 }
235 toks += q->tokens;
236 if (toks > q->buffer)
237 toks = q->buffer;
238 toks -= (s64) psched_l2t_ns(&q->rate, len);
239
240 if ((toks|ptoks) >= 0) {
241 skb = qdisc_dequeue_peeked(q->qdisc);
242 if (unlikely(!skb))
243 return NULL;
244
245 q->t_c = now;
246 q->tokens = toks;
247 q->ptokens = ptoks;
248 qdisc_qstats_backlog_dec(sch, skb);
249 sch->q.qlen--;
250 qdisc_bstats_update(sch, skb);
251 return skb;
252 }
253
254 qdisc_watchdog_schedule_ns(&q->watchdog,
255 now + max_t(long, -toks, -ptoks));
256
257
258
259
260
261
262
263
264
265
266
267
268 qdisc_qstats_overlimit(sch);
269 }
270 return NULL;
271}
272
273static void tbf_reset(struct Qdisc *sch)
274{
275 struct tbf_sched_data *q = qdisc_priv(sch);
276
277 qdisc_reset(q->qdisc);
278 sch->qstats.backlog = 0;
279 sch->q.qlen = 0;
280 q->t_c = ktime_get_ns();
281 q->tokens = q->buffer;
282 q->ptokens = q->mtu;
283 qdisc_watchdog_cancel(&q->watchdog);
284}
285
286static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
287 [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
288 [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
289 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
290 [TCA_TBF_RATE64] = { .type = NLA_U64 },
291 [TCA_TBF_PRATE64] = { .type = NLA_U64 },
292 [TCA_TBF_BURST] = { .type = NLA_U32 },
293 [TCA_TBF_PBURST] = { .type = NLA_U32 },
294};
295
296static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
297 struct netlink_ext_ack *extack)
298{
299 int err;
300 struct tbf_sched_data *q = qdisc_priv(sch);
301 struct nlattr *tb[TCA_TBF_MAX + 1];
302 struct tc_tbf_qopt *qopt;
303 struct Qdisc *child = NULL;
304 struct psched_ratecfg rate;
305 struct psched_ratecfg peak;
306 u64 max_size;
307 s64 buffer, mtu;
308 u64 rate64 = 0, prate64 = 0;
309
310 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
311 if (err < 0)
312 return err;
313
314 err = -EINVAL;
315 if (tb[TCA_TBF_PARMS] == NULL)
316 goto done;
317
318 qopt = nla_data(tb[TCA_TBF_PARMS]);
319 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
320 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
321 tb[TCA_TBF_RTAB],
322 NULL));
323
324 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
325 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
326 tb[TCA_TBF_PTAB],
327 NULL));
328
329 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
330 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
331
332 if (tb[TCA_TBF_RATE64])
333 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
334 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
335
336 if (tb[TCA_TBF_BURST]) {
337 max_size = nla_get_u32(tb[TCA_TBF_BURST]);
338 buffer = psched_l2t_ns(&rate, max_size);
339 } else {
340 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
341 }
342
343 if (qopt->peakrate.rate) {
344 if (tb[TCA_TBF_PRATE64])
345 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
346 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
347 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
348 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
349 peak.rate_bytes_ps, rate.rate_bytes_ps);
350 err = -EINVAL;
351 goto done;
352 }
353
354 if (tb[TCA_TBF_PBURST]) {
355 u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
356 max_size = min_t(u32, max_size, pburst);
357 mtu = psched_l2t_ns(&peak, pburst);
358 } else {
359 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
360 }
361 } else {
362 memset(&peak, 0, sizeof(peak));
363 }
364
365 if (max_size < psched_mtu(qdisc_dev(sch)))
366 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
367 max_size, qdisc_dev(sch)->name,
368 psched_mtu(qdisc_dev(sch)));
369
370 if (!max_size) {
371 err = -EINVAL;
372 goto done;
373 }
374
375 if (q->qdisc != &noop_qdisc) {
376 err = fifo_set_limit(q->qdisc, qopt->limit);
377 if (err)
378 goto done;
379 } else if (qopt->limit > 0) {
380 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
381 extack);
382 if (IS_ERR(child)) {
383 err = PTR_ERR(child);
384 goto done;
385 }
386
387
388 qdisc_hash_add(child, true);
389 }
390
391 sch_tree_lock(sch);
392 if (child) {
393 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
394 q->qdisc->qstats.backlog);
395 qdisc_destroy(q->qdisc);
396 q->qdisc = child;
397 }
398 q->limit = qopt->limit;
399 if (tb[TCA_TBF_PBURST])
400 q->mtu = mtu;
401 else
402 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
403 q->max_size = max_size;
404 if (tb[TCA_TBF_BURST])
405 q->buffer = buffer;
406 else
407 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
408 q->tokens = q->buffer;
409 q->ptokens = q->mtu;
410
411 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
412 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
413
414 sch_tree_unlock(sch);
415 err = 0;
416done:
417 return err;
418}
419
420static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
421 struct netlink_ext_ack *extack)
422{
423 struct tbf_sched_data *q = qdisc_priv(sch);
424
425 qdisc_watchdog_init(&q->watchdog, sch);
426 q->qdisc = &noop_qdisc;
427
428 if (!opt)
429 return -EINVAL;
430
431 q->t_c = ktime_get_ns();
432
433 return tbf_change(sch, opt, extack);
434}
435
436static void tbf_destroy(struct Qdisc *sch)
437{
438 struct tbf_sched_data *q = qdisc_priv(sch);
439
440 qdisc_watchdog_cancel(&q->watchdog);
441 qdisc_destroy(q->qdisc);
442}
443
444static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
445{
446 struct tbf_sched_data *q = qdisc_priv(sch);
447 struct nlattr *nest;
448 struct tc_tbf_qopt opt;
449
450 sch->qstats.backlog = q->qdisc->qstats.backlog;
451 nest = nla_nest_start(skb, TCA_OPTIONS);
452 if (nest == NULL)
453 goto nla_put_failure;
454
455 opt.limit = q->limit;
456 psched_ratecfg_getrate(&opt.rate, &q->rate);
457 if (tbf_peak_present(q))
458 psched_ratecfg_getrate(&opt.peakrate, &q->peak);
459 else
460 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
461 opt.mtu = PSCHED_NS2TICKS(q->mtu);
462 opt.buffer = PSCHED_NS2TICKS(q->buffer);
463 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
464 goto nla_put_failure;
465 if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
466 nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
467 TCA_TBF_PAD))
468 goto nla_put_failure;
469 if (tbf_peak_present(q) &&
470 q->peak.rate_bytes_ps >= (1ULL << 32) &&
471 nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
472 TCA_TBF_PAD))
473 goto nla_put_failure;
474
475 return nla_nest_end(skb, nest);
476
477nla_put_failure:
478 nla_nest_cancel(skb, nest);
479 return -1;
480}
481
482static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
483 struct sk_buff *skb, struct tcmsg *tcm)
484{
485 struct tbf_sched_data *q = qdisc_priv(sch);
486
487 tcm->tcm_handle |= TC_H_MIN(1);
488 tcm->tcm_info = q->qdisc->handle;
489
490 return 0;
491}
492
493static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
494 struct Qdisc **old, struct netlink_ext_ack *extack)
495{
496 struct tbf_sched_data *q = qdisc_priv(sch);
497
498 if (new == NULL)
499 new = &noop_qdisc;
500
501 *old = qdisc_replace(sch, new, &q->qdisc);
502 return 0;
503}
504
505static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
506{
507 struct tbf_sched_data *q = qdisc_priv(sch);
508 return q->qdisc;
509}
510
511static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
512{
513 return 1;
514}
515
516static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
517{
518 if (!walker->stop) {
519 if (walker->count >= walker->skip)
520 if (walker->fn(sch, 1, walker) < 0) {
521 walker->stop = 1;
522 return;
523 }
524 walker->count++;
525 }
526}
527
528static const struct Qdisc_class_ops tbf_class_ops = {
529 .graft = tbf_graft,
530 .leaf = tbf_leaf,
531 .find = tbf_find,
532 .walk = tbf_walk,
533 .dump = tbf_dump_class,
534};
535
536static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
537 .next = NULL,
538 .cl_ops = &tbf_class_ops,
539 .id = "tbf",
540 .priv_size = sizeof(struct tbf_sched_data),
541 .enqueue = tbf_enqueue,
542 .dequeue = tbf_dequeue,
543 .peek = qdisc_peek_dequeued,
544 .init = tbf_init,
545 .reset = tbf_reset,
546 .destroy = tbf_destroy,
547 .change = tbf_change,
548 .dump = tbf_dump,
549 .owner = THIS_MODULE,
550};
551
552static int __init tbf_module_init(void)
553{
554 return register_qdisc(&tbf_qdisc_ops);
555}
556
557static void __exit tbf_module_exit(void)
558{
559 unregister_qdisc(&tbf_qdisc_ops);
560}
561module_init(tbf_module_init)
562module_exit(tbf_module_exit)
563MODULE_LICENSE("GPL");
564