1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/errno.h>
14#include <linux/errqueue.h>
15#include <linux/rbtree.h>
16#include <linux/skbuff.h>
17#include <linux/posix-timers.h>
18#include <net/netlink.h>
19#include <net/sch_generic.h>
20#include <net/pkt_sched.h>
21#include <net/sock.h>
22
23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
26
27struct etf_sched_data {
28 bool offload;
29 bool deadline_mode;
30 bool skip_sock_check;
31 int clockid;
32 int queue;
33 s32 delta;
34 ktime_t last;
35 struct rb_root_cached head;
36 struct qdisc_watchdog watchdog;
37 ktime_t (*get_time)(void);
38};
39
40static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
42};
43
44static inline int validate_input_params(struct tc_etf_qopt *qopt,
45 struct netlink_ext_ack *extack)
46{
47
48
49
50
51
52
53
54
55
56
57 if (qopt->clockid < 0) {
58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
59 return -ENOTSUPP;
60 }
61
62 if (qopt->clockid != CLOCK_TAI) {
63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
64 return -EINVAL;
65 }
66
67 if (qopt->delta < 0) {
68 NL_SET_ERR_MSG(extack, "Delta must be positive");
69 return -EINVAL;
70 }
71
72 return 0;
73}
74
75static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
76{
77 struct etf_sched_data *q = qdisc_priv(sch);
78 ktime_t txtime = nskb->tstamp;
79 struct sock *sk = nskb->sk;
80 ktime_t now;
81
82 if (q->skip_sock_check)
83 goto skip;
84
85 if (!sk || !sk_fullsock(sk))
86 return false;
87
88 if (!sock_flag(sk, SOCK_TXTIME))
89 return false;
90
91
92
93
94 if (sk->sk_clockid != q->clockid)
95 return false;
96
97 if (sk->sk_txtime_deadline_mode != q->deadline_mode)
98 return false;
99
100skip:
101 now = q->get_time();
102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
103 return false;
104
105 return true;
106}
107
108static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
109{
110 struct etf_sched_data *q = qdisc_priv(sch);
111 struct rb_node *p;
112
113 p = rb_first_cached(&q->head);
114 if (!p)
115 return NULL;
116
117 return rb_to_skb(p);
118}
119
120static void reset_watchdog(struct Qdisc *sch)
121{
122 struct etf_sched_data *q = qdisc_priv(sch);
123 struct sk_buff *skb = etf_peek_timesortedlist(sch);
124 ktime_t next;
125
126 if (!skb) {
127 qdisc_watchdog_cancel(&q->watchdog);
128 return;
129 }
130
131 next = ktime_sub_ns(skb->tstamp, q->delta);
132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
133}
134
135static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
136{
137 struct sock_exterr_skb *serr;
138 struct sk_buff *clone;
139 ktime_t txtime = skb->tstamp;
140 struct sock *sk = skb->sk;
141
142 if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors))
143 return;
144
145 clone = skb_clone(skb, GFP_ATOMIC);
146 if (!clone)
147 return;
148
149 serr = SKB_EXT_ERR(clone);
150 serr->ee.ee_errno = err;
151 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
152 serr->ee.ee_type = 0;
153 serr->ee.ee_code = code;
154 serr->ee.ee_pad = 0;
155 serr->ee.ee_data = (txtime >> 32);
156 serr->ee.ee_info = txtime;
157
158 if (sock_queue_err_skb(sk, clone))
159 kfree_skb(clone);
160}
161
162static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
163 struct sk_buff **to_free)
164{
165 struct etf_sched_data *q = qdisc_priv(sch);
166 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
167 ktime_t txtime = nskb->tstamp;
168 bool leftmost = true;
169
170 if (!is_packet_valid(sch, nskb)) {
171 report_sock_error(nskb, EINVAL,
172 SO_EE_CODE_TXTIME_INVALID_PARAM);
173 return qdisc_drop(nskb, sch, to_free);
174 }
175
176 while (*p) {
177 struct sk_buff *skb;
178
179 parent = *p;
180 skb = rb_to_skb(parent);
181 if (ktime_compare(txtime, skb->tstamp) >= 0) {
182 p = &parent->rb_right;
183 leftmost = false;
184 } else {
185 p = &parent->rb_left;
186 }
187 }
188 rb_link_node(&nskb->rbnode, parent, p);
189 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
190
191 qdisc_qstats_backlog_inc(sch, nskb);
192 sch->q.qlen++;
193
194
195 reset_watchdog(sch);
196
197 return NET_XMIT_SUCCESS;
198}
199
200static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
201 ktime_t now)
202{
203 struct etf_sched_data *q = qdisc_priv(sch);
204 struct sk_buff *to_free = NULL;
205 struct sk_buff *tmp = NULL;
206
207 skb_rbtree_walk_from_safe(skb, tmp) {
208 if (ktime_after(skb->tstamp, now))
209 break;
210
211 rb_erase_cached(&skb->rbnode, &q->head);
212
213
214
215
216 skb->next = NULL;
217 skb->prev = NULL;
218 skb->dev = qdisc_dev(sch);
219
220 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
221
222 qdisc_qstats_backlog_dec(sch, skb);
223 qdisc_drop(skb, sch, &to_free);
224 qdisc_qstats_overlimit(sch);
225 sch->q.qlen--;
226 }
227
228 kfree_skb_list(to_free);
229}
230
231static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
232{
233 struct etf_sched_data *q = qdisc_priv(sch);
234
235 rb_erase_cached(&skb->rbnode, &q->head);
236
237
238
239
240 skb->next = NULL;
241 skb->prev = NULL;
242 skb->dev = qdisc_dev(sch);
243
244 qdisc_qstats_backlog_dec(sch, skb);
245
246 qdisc_bstats_update(sch, skb);
247
248 q->last = skb->tstamp;
249
250 sch->q.qlen--;
251}
252
253static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
254{
255 struct etf_sched_data *q = qdisc_priv(sch);
256 struct sk_buff *skb;
257 ktime_t now, next;
258
259 skb = etf_peek_timesortedlist(sch);
260 if (!skb)
261 return NULL;
262
263 now = q->get_time();
264
265
266 if (ktime_before(skb->tstamp, now)) {
267 timesortedlist_drop(sch, skb, now);
268 skb = NULL;
269 goto out;
270 }
271
272
273
274
275 if (q->deadline_mode) {
276 timesortedlist_remove(sch, skb);
277 skb->tstamp = now;
278 goto out;
279 }
280
281 next = ktime_sub_ns(skb->tstamp, q->delta);
282
283
284 if (ktime_after(now, next))
285 timesortedlist_remove(sch, skb);
286 else
287 skb = NULL;
288
289out:
290
291 reset_watchdog(sch);
292
293 return skb;
294}
295
296static void etf_disable_offload(struct net_device *dev,
297 struct etf_sched_data *q)
298{
299 struct tc_etf_qopt_offload etf = { };
300 const struct net_device_ops *ops;
301 int err;
302
303 if (!q->offload)
304 return;
305
306 ops = dev->netdev_ops;
307 if (!ops->ndo_setup_tc)
308 return;
309
310 etf.queue = q->queue;
311 etf.enable = 0;
312
313 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
314 if (err < 0)
315 pr_warn("Couldn't disable ETF offload for queue %d\n",
316 etf.queue);
317}
318
319static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
320 struct netlink_ext_ack *extack)
321{
322 const struct net_device_ops *ops = dev->netdev_ops;
323 struct tc_etf_qopt_offload etf = { };
324 int err;
325
326 if (q->offload)
327 return 0;
328
329 if (!ops->ndo_setup_tc) {
330 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
331 return -EOPNOTSUPP;
332 }
333
334 etf.queue = q->queue;
335 etf.enable = 1;
336
337 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
338 if (err < 0) {
339 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
340 return err;
341 }
342
343 return 0;
344}
345
346static int etf_init(struct Qdisc *sch, struct nlattr *opt,
347 struct netlink_ext_ack *extack)
348{
349 struct etf_sched_data *q = qdisc_priv(sch);
350 struct net_device *dev = qdisc_dev(sch);
351 struct nlattr *tb[TCA_ETF_MAX + 1];
352 struct tc_etf_qopt *qopt;
353 int err;
354
355 if (!opt) {
356 NL_SET_ERR_MSG(extack,
357 "Missing ETF qdisc options which are mandatory");
358 return -EINVAL;
359 }
360
361 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
362 extack);
363 if (err < 0)
364 return err;
365
366 if (!tb[TCA_ETF_PARMS]) {
367 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
368 return -EINVAL;
369 }
370
371 qopt = nla_data(tb[TCA_ETF_PARMS]);
372
373 pr_debug("delta %d clockid %d offload %s deadline %s\n",
374 qopt->delta, qopt->clockid,
375 OFFLOAD_IS_ON(qopt) ? "on" : "off",
376 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
377
378 err = validate_input_params(qopt, extack);
379 if (err < 0)
380 return err;
381
382 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
383
384 if (OFFLOAD_IS_ON(qopt)) {
385 err = etf_enable_offload(dev, q, extack);
386 if (err < 0)
387 return err;
388 }
389
390
391 q->delta = qopt->delta;
392 q->clockid = qopt->clockid;
393 q->offload = OFFLOAD_IS_ON(qopt);
394 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
395 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
396
397 switch (q->clockid) {
398 case CLOCK_REALTIME:
399 q->get_time = ktime_get_real;
400 break;
401 case CLOCK_MONOTONIC:
402 q->get_time = ktime_get;
403 break;
404 case CLOCK_BOOTTIME:
405 q->get_time = ktime_get_boottime;
406 break;
407 case CLOCK_TAI:
408 q->get_time = ktime_get_clocktai;
409 break;
410 default:
411 NL_SET_ERR_MSG(extack, "Clockid is not supported");
412 return -ENOTSUPP;
413 }
414
415 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
416
417 return 0;
418}
419
420static void timesortedlist_clear(struct Qdisc *sch)
421{
422 struct etf_sched_data *q = qdisc_priv(sch);
423 struct rb_node *p = rb_first_cached(&q->head);
424
425 while (p) {
426 struct sk_buff *skb = rb_to_skb(p);
427
428 p = rb_next(p);
429
430 rb_erase_cached(&skb->rbnode, &q->head);
431 rtnl_kfree_skbs(skb, skb);
432 sch->q.qlen--;
433 }
434}
435
436static void etf_reset(struct Qdisc *sch)
437{
438 struct etf_sched_data *q = qdisc_priv(sch);
439
440
441 if (q->watchdog.qdisc == sch)
442 qdisc_watchdog_cancel(&q->watchdog);
443
444
445 timesortedlist_clear(sch);
446 __qdisc_reset_queue(&sch->q);
447
448 sch->qstats.backlog = 0;
449 sch->q.qlen = 0;
450
451 q->last = 0;
452}
453
454static void etf_destroy(struct Qdisc *sch)
455{
456 struct etf_sched_data *q = qdisc_priv(sch);
457 struct net_device *dev = qdisc_dev(sch);
458
459
460 if (q->watchdog.qdisc == sch)
461 qdisc_watchdog_cancel(&q->watchdog);
462
463 etf_disable_offload(dev, q);
464}
465
466static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
467{
468 struct etf_sched_data *q = qdisc_priv(sch);
469 struct tc_etf_qopt opt = { };
470 struct nlattr *nest;
471
472 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
473 if (!nest)
474 goto nla_put_failure;
475
476 opt.delta = q->delta;
477 opt.clockid = q->clockid;
478 if (q->offload)
479 opt.flags |= TC_ETF_OFFLOAD_ON;
480
481 if (q->deadline_mode)
482 opt.flags |= TC_ETF_DEADLINE_MODE_ON;
483
484 if (q->skip_sock_check)
485 opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
486
487 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
488 goto nla_put_failure;
489
490 return nla_nest_end(skb, nest);
491
492nla_put_failure:
493 nla_nest_cancel(skb, nest);
494 return -1;
495}
496
497static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
498 .id = "etf",
499 .priv_size = sizeof(struct etf_sched_data),
500 .enqueue = etf_enqueue_timesortedlist,
501 .dequeue = etf_dequeue_timesortedlist,
502 .peek = etf_peek_timesortedlist,
503 .init = etf_init,
504 .reset = etf_reset,
505 .destroy = etf_destroy,
506 .dump = etf_dump,
507 .owner = THIS_MODULE,
508};
509
510static int __init etf_module_init(void)
511{
512 return register_qdisc(&etf_qdisc_ops);
513}
514
515static void __exit etf_module_exit(void)
516{
517 unregister_qdisc(&etf_qdisc_ops);
518}
519module_init(etf_module_init)
520module_exit(etf_module_exit)
521MODULE_LICENSE("GPL");
522