1
2
3
4
5
6
7
8
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/list.h>
14#include <linux/errno.h>
15#include <linux/skbuff.h>
16#include <linux/math64.h>
17#include <linux/module.h>
18#include <linux/spinlock.h>
19#include <linux/rcupdate.h>
20#include <net/netlink.h>
21#include <net/pkt_sched.h>
22#include <net/pkt_cls.h>
23#include <net/sch_generic.h>
24
25static LIST_HEAD(taprio_list);
26static DEFINE_SPINLOCK(taprio_list_lock);
27
28#define TAPRIO_ALL_GATES_OPEN -1
29
30struct sched_entry {
31 struct list_head list;
32
33
34
35
36
37 ktime_t close_time;
38 atomic_t budget;
39 int index;
40 u32 gate_mask;
41 u32 interval;
42 u8 command;
43};
44
45struct sched_gate_list {
46 struct rcu_head rcu;
47 struct list_head entries;
48 size_t num_entries;
49 ktime_t cycle_close_time;
50 s64 cycle_time;
51 s64 cycle_time_extension;
52 s64 base_time;
53};
54
55struct taprio_sched {
56 struct Qdisc **qdiscs;
57 struct Qdisc *root;
58 int clockid;
59 atomic64_t picos_per_byte;
60
61
62
63
64 spinlock_t current_entry_lock;
65 struct sched_entry __rcu *current_entry;
66 struct sched_gate_list __rcu *oper_sched;
67 struct sched_gate_list __rcu *admin_sched;
68 ktime_t (*get_time)(void);
69 struct hrtimer advance_timer;
70 struct list_head taprio_list;
71};
72
73static ktime_t sched_base_time(const struct sched_gate_list *sched)
74{
75 if (!sched)
76 return KTIME_MAX;
77
78 return ns_to_ktime(sched->base_time);
79}
80
81static void taprio_free_sched_cb(struct rcu_head *head)
82{
83 struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
84 struct sched_entry *entry, *n;
85
86 if (!sched)
87 return;
88
89 list_for_each_entry_safe(entry, n, &sched->entries, list) {
90 list_del(&entry->list);
91 kfree(entry);
92 }
93
94 kfree(sched);
95}
96
97static void switch_schedules(struct taprio_sched *q,
98 struct sched_gate_list **admin,
99 struct sched_gate_list **oper)
100{
101 rcu_assign_pointer(q->oper_sched, *admin);
102 rcu_assign_pointer(q->admin_sched, NULL);
103
104 if (*oper)
105 call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
106
107 *oper = *admin;
108 *admin = NULL;
109}
110
111static ktime_t get_cycle_time(struct sched_gate_list *sched)
112{
113 struct sched_entry *entry;
114 ktime_t cycle = 0;
115
116 if (sched->cycle_time != 0)
117 return sched->cycle_time;
118
119 list_for_each_entry(entry, &sched->entries, list)
120 cycle = ktime_add_ns(cycle, entry->interval);
121
122 sched->cycle_time = cycle;
123
124 return cycle;
125}
126
127static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
128 struct sk_buff **to_free)
129{
130 struct taprio_sched *q = qdisc_priv(sch);
131 struct Qdisc *child;
132 int queue;
133
134 queue = skb_get_queue_mapping(skb);
135
136 child = q->qdiscs[queue];
137 if (unlikely(!child))
138 return qdisc_drop(skb, sch, to_free);
139
140 qdisc_qstats_backlog_inc(sch, skb);
141 sch->q.qlen++;
142
143 return qdisc_enqueue(skb, child, to_free);
144}
145
146static struct sk_buff *taprio_peek(struct Qdisc *sch)
147{
148 struct taprio_sched *q = qdisc_priv(sch);
149 struct net_device *dev = qdisc_dev(sch);
150 struct sched_entry *entry;
151 struct sk_buff *skb;
152 u32 gate_mask;
153 int i;
154
155 rcu_read_lock();
156 entry = rcu_dereference(q->current_entry);
157 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
158 rcu_read_unlock();
159
160 if (!gate_mask)
161 return NULL;
162
163 for (i = 0; i < dev->num_tx_queues; i++) {
164 struct Qdisc *child = q->qdiscs[i];
165 int prio;
166 u8 tc;
167
168 if (unlikely(!child))
169 continue;
170
171 skb = child->ops->peek(child);
172 if (!skb)
173 continue;
174
175 prio = skb->priority;
176 tc = netdev_get_prio_tc_map(dev, prio);
177
178 if (!(gate_mask & BIT(tc)))
179 continue;
180
181 return skb;
182 }
183
184 return NULL;
185}
186
187static inline int length_to_duration(struct taprio_sched *q, int len)
188{
189 return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
190}
191
192static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
193{
194 atomic_set(&entry->budget,
195 div64_u64((u64)entry->interval * 1000,
196 atomic64_read(&q->picos_per_byte)));
197}
198
199static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
200{
201 struct taprio_sched *q = qdisc_priv(sch);
202 struct net_device *dev = qdisc_dev(sch);
203 struct sk_buff *skb = NULL;
204 struct sched_entry *entry;
205 u32 gate_mask;
206 int i;
207
208 if (atomic64_read(&q->picos_per_byte) == -1) {
209 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte.");
210 return NULL;
211 }
212
213 rcu_read_lock();
214 entry = rcu_dereference(q->current_entry);
215
216
217
218
219
220 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
221
222 if (!gate_mask)
223 goto done;
224
225 for (i = 0; i < dev->num_tx_queues; i++) {
226 struct Qdisc *child = q->qdiscs[i];
227 ktime_t guard;
228 int prio;
229 int len;
230 u8 tc;
231
232 if (unlikely(!child))
233 continue;
234
235 skb = child->ops->peek(child);
236 if (!skb)
237 continue;
238
239 prio = skb->priority;
240 tc = netdev_get_prio_tc_map(dev, prio);
241
242 if (!(gate_mask & BIT(tc)))
243 continue;
244
245 len = qdisc_pkt_len(skb);
246 guard = ktime_add_ns(q->get_time(),
247 length_to_duration(q, len));
248
249
250
251
252 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
253 ktime_after(guard, entry->close_time))
254 continue;
255
256
257 if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
258 atomic_sub_return(len, &entry->budget) < 0)
259 continue;
260
261 skb = child->ops->dequeue(child);
262 if (unlikely(!skb))
263 goto done;
264
265 qdisc_bstats_update(sch, skb);
266 qdisc_qstats_backlog_dec(sch, skb);
267 sch->q.qlen--;
268
269 goto done;
270 }
271
272done:
273 rcu_read_unlock();
274
275 return skb;
276}
277
278static bool should_restart_cycle(const struct sched_gate_list *oper,
279 const struct sched_entry *entry)
280{
281 if (list_is_last(&entry->list, &oper->entries))
282 return true;
283
284 if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
285 return true;
286
287 return false;
288}
289
290static bool should_change_schedules(const struct sched_gate_list *admin,
291 const struct sched_gate_list *oper,
292 ktime_t close_time)
293{
294 ktime_t next_base_time, extension_time;
295
296 if (!admin)
297 return false;
298
299 next_base_time = sched_base_time(admin);
300
301
302
303
304 if (ktime_compare(next_base_time, close_time) <= 0)
305 return true;
306
307
308
309
310
311
312 extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
313
314
315
316
317
318 if (ktime_compare(next_base_time, extension_time) <= 0)
319 return true;
320
321 return false;
322}
323
324static enum hrtimer_restart advance_sched(struct hrtimer *timer)
325{
326 struct taprio_sched *q = container_of(timer, struct taprio_sched,
327 advance_timer);
328 struct sched_gate_list *oper, *admin;
329 struct sched_entry *entry, *next;
330 struct Qdisc *sch = q->root;
331 ktime_t close_time;
332
333 spin_lock(&q->current_entry_lock);
334 entry = rcu_dereference_protected(q->current_entry,
335 lockdep_is_held(&q->current_entry_lock));
336 oper = rcu_dereference_protected(q->oper_sched,
337 lockdep_is_held(&q->current_entry_lock));
338 admin = rcu_dereference_protected(q->admin_sched,
339 lockdep_is_held(&q->current_entry_lock));
340
341 if (!oper)
342 switch_schedules(q, &admin, &oper);
343
344
345
346
347
348
349
350 if (unlikely(!entry || entry->close_time == oper->base_time)) {
351 next = list_first_entry(&oper->entries, struct sched_entry,
352 list);
353 close_time = next->close_time;
354 goto first_run;
355 }
356
357 if (should_restart_cycle(oper, entry)) {
358 next = list_first_entry(&oper->entries, struct sched_entry,
359 list);
360 oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
361 oper->cycle_time);
362 } else {
363 next = list_next_entry(entry, list);
364 }
365
366 close_time = ktime_add_ns(entry->close_time, next->interval);
367 close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
368
369 if (should_change_schedules(admin, oper, close_time)) {
370
371
372
373 close_time = sched_base_time(admin);
374 switch_schedules(q, &admin, &oper);
375 }
376
377 next->close_time = close_time;
378 taprio_set_budget(q, next);
379
380first_run:
381 rcu_assign_pointer(q->current_entry, next);
382 spin_unlock(&q->current_entry_lock);
383
384 hrtimer_set_expires(&q->advance_timer, close_time);
385
386 rcu_read_lock();
387 __netif_schedule(sch);
388 rcu_read_unlock();
389
390 return HRTIMER_RESTART;
391}
392
393static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
394 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 },
395 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 },
396 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
397 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 },
398};
399
400static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = {
401 [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED },
402};
403
404static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
405 [TCA_TAPRIO_ATTR_PRIOMAP] = {
406 .len = sizeof(struct tc_mqprio_qopt)
407 },
408 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
409 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
410 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
411 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
412 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
413 [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
414};
415
416static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
417 struct netlink_ext_ack *extack)
418{
419 u32 interval = 0;
420
421 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
422 entry->command = nla_get_u8(
423 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
424
425 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
426 entry->gate_mask = nla_get_u32(
427 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
428
429 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
430 interval = nla_get_u32(
431 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
432
433 if (interval == 0) {
434 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
435 return -EINVAL;
436 }
437
438 entry->interval = interval;
439
440 return 0;
441}
442
443static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
444 int index, struct netlink_ext_ack *extack)
445{
446 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
447 int err;
448
449 err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
450 entry_policy, NULL);
451 if (err < 0) {
452 NL_SET_ERR_MSG(extack, "Could not parse nested entry");
453 return -EINVAL;
454 }
455
456 entry->index = index;
457
458 return fill_sched_entry(tb, entry, extack);
459}
460
461static int parse_sched_list(struct nlattr *list,
462 struct sched_gate_list *sched,
463 struct netlink_ext_ack *extack)
464{
465 struct nlattr *n;
466 int err, rem;
467 int i = 0;
468
469 if (!list)
470 return -EINVAL;
471
472 nla_for_each_nested(n, list, rem) {
473 struct sched_entry *entry;
474
475 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
476 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
477 continue;
478 }
479
480 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
481 if (!entry) {
482 NL_SET_ERR_MSG(extack, "Not enough memory for entry");
483 return -ENOMEM;
484 }
485
486 err = parse_sched_entry(n, entry, i, extack);
487 if (err < 0) {
488 kfree(entry);
489 return err;
490 }
491
492 list_add_tail(&entry->list, &sched->entries);
493 i++;
494 }
495
496 sched->num_entries = i;
497
498 return i;
499}
500
501static int parse_taprio_schedule(struct nlattr **tb,
502 struct sched_gate_list *new,
503 struct netlink_ext_ack *extack)
504{
505 int err = 0;
506
507 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
508 NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
509 return -ENOTSUPP;
510 }
511
512 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
513 new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
514
515 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
516 new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
517
518 if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
519 new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
520
521 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
522 err = parse_sched_list(
523 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
524 if (err < 0)
525 return err;
526
527 return 0;
528}
529
530static int taprio_parse_mqprio_opt(struct net_device *dev,
531 struct tc_mqprio_qopt *qopt,
532 struct netlink_ext_ack *extack)
533{
534 int i, j;
535
536 if (!qopt && !dev->num_tc) {
537 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
538 return -EINVAL;
539 }
540
541
542
543
544 if (dev->num_tc)
545 return 0;
546
547
548 if (qopt->num_tc > TC_MAX_QUEUE) {
549 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
550 return -EINVAL;
551 }
552
553
554 if (qopt->num_tc > dev->num_tx_queues) {
555 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
556 return -EINVAL;
557 }
558
559
560 for (i = 0; i < TC_BITMASK + 1; i++) {
561 if (qopt->prio_tc_map[i] >= qopt->num_tc) {
562 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
563 return -EINVAL;
564 }
565 }
566
567 for (i = 0; i < qopt->num_tc; i++) {
568 unsigned int last = qopt->offset[i] + qopt->count[i];
569
570
571
572
573 if (qopt->offset[i] >= dev->num_tx_queues ||
574 !qopt->count[i] ||
575 last > dev->real_num_tx_queues) {
576 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
577 return -EINVAL;
578 }
579
580
581 for (j = i + 1; j < qopt->num_tc; j++) {
582 if (last > qopt->offset[j]) {
583 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
584 return -EINVAL;
585 }
586 }
587 }
588
589 return 0;
590}
591
592static int taprio_get_start_time(struct Qdisc *sch,
593 struct sched_gate_list *sched,
594 ktime_t *start)
595{
596 struct taprio_sched *q = qdisc_priv(sch);
597 ktime_t now, base, cycle;
598 s64 n;
599
600 base = sched_base_time(sched);
601 now = q->get_time();
602
603 if (ktime_after(base, now)) {
604 *start = base;
605 return 0;
606 }
607
608 cycle = get_cycle_time(sched);
609
610
611
612
613
614
615 if (WARN_ON(!cycle))
616 return -EFAULT;
617
618
619
620
621 n = div64_s64(ktime_sub_ns(now, base), cycle);
622 *start = ktime_add_ns(base, (n + 1) * cycle);
623 return 0;
624}
625
626static void setup_first_close_time(struct taprio_sched *q,
627 struct sched_gate_list *sched, ktime_t base)
628{
629 struct sched_entry *first;
630 ktime_t cycle;
631
632 first = list_first_entry(&sched->entries,
633 struct sched_entry, list);
634
635 cycle = get_cycle_time(sched);
636
637
638 sched->cycle_close_time = ktime_add_ns(base, cycle);
639
640 first->close_time = ktime_add_ns(base, first->interval);
641 taprio_set_budget(q, first);
642 rcu_assign_pointer(q->current_entry, NULL);
643}
644
645static void taprio_start_sched(struct Qdisc *sch,
646 ktime_t start, struct sched_gate_list *new)
647{
648 struct taprio_sched *q = qdisc_priv(sch);
649 ktime_t expires;
650
651 expires = hrtimer_get_expires(&q->advance_timer);
652 if (expires == 0)
653 expires = KTIME_MAX;
654
655
656
657
658
659 start = min_t(ktime_t, start, expires);
660
661 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
662}
663
664static void taprio_set_picos_per_byte(struct net_device *dev,
665 struct taprio_sched *q)
666{
667 struct ethtool_link_ksettings ecmd;
668 int picos_per_byte = -1;
669
670 if (!__ethtool_get_link_ksettings(dev, &ecmd) &&
671 ecmd.base.speed != SPEED_UNKNOWN)
672 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8,
673 ecmd.base.speed * 1000 * 1000);
674
675 atomic64_set(&q->picos_per_byte, picos_per_byte);
676 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
677 dev->name, (long long)atomic64_read(&q->picos_per_byte),
678 ecmd.base.speed);
679}
680
681static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
682 void *ptr)
683{
684 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
685 struct net_device *qdev;
686 struct taprio_sched *q;
687 bool found = false;
688
689 ASSERT_RTNL();
690
691 if (event != NETDEV_UP && event != NETDEV_CHANGE)
692 return NOTIFY_DONE;
693
694 spin_lock(&taprio_list_lock);
695 list_for_each_entry(q, &taprio_list, taprio_list) {
696 qdev = qdisc_dev(q->root);
697 if (qdev == dev) {
698 found = true;
699 break;
700 }
701 }
702 spin_unlock(&taprio_list_lock);
703
704 if (found)
705 taprio_set_picos_per_byte(dev, q);
706
707 return NOTIFY_DONE;
708}
709
710static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
711 struct netlink_ext_ack *extack)
712{
713 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
714 struct sched_gate_list *oper, *admin, *new_admin;
715 struct taprio_sched *q = qdisc_priv(sch);
716 struct net_device *dev = qdisc_dev(sch);
717 struct tc_mqprio_qopt *mqprio = NULL;
718 int i, err, clockid;
719 unsigned long flags;
720 ktime_t start;
721
722 err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt,
723 taprio_policy, extack);
724 if (err < 0)
725 return err;
726
727 if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
728 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
729
730 err = taprio_parse_mqprio_opt(dev, mqprio, extack);
731 if (err < 0)
732 return err;
733
734 new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
735 if (!new_admin) {
736 NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
737 return -ENOMEM;
738 }
739 INIT_LIST_HEAD(&new_admin->entries);
740
741 rcu_read_lock();
742 oper = rcu_dereference(q->oper_sched);
743 admin = rcu_dereference(q->admin_sched);
744 rcu_read_unlock();
745
746 if (mqprio && (oper || admin)) {
747 NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
748 err = -ENOTSUPP;
749 goto free_sched;
750 }
751
752 err = parse_taprio_schedule(tb, new_admin, extack);
753 if (err < 0)
754 goto free_sched;
755
756 if (new_admin->num_entries == 0) {
757 NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
758 err = -EINVAL;
759 goto free_sched;
760 }
761
762 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
763 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
764
765
766
767
768 if (clockid < 0 ||
769 (q->clockid != -1 && q->clockid != clockid)) {
770 NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported");
771 err = -ENOTSUPP;
772 goto free_sched;
773 }
774
775 q->clockid = clockid;
776 }
777
778 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
779 NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
780 err = -EINVAL;
781 goto free_sched;
782 }
783
784 taprio_set_picos_per_byte(dev, q);
785
786
787 spin_lock_bh(qdisc_lock(sch));
788
789 if (!hrtimer_active(&q->advance_timer)) {
790 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
791 q->advance_timer.function = advance_sched;
792 }
793
794 if (mqprio) {
795 netdev_set_num_tc(dev, mqprio->num_tc);
796 for (i = 0; i < mqprio->num_tc; i++)
797 netdev_set_tc_queue(dev, i,
798 mqprio->count[i],
799 mqprio->offset[i]);
800
801
802 for (i = 0; i < TC_BITMASK + 1; i++)
803 netdev_set_prio_tc_map(dev, i,
804 mqprio->prio_tc_map[i]);
805 }
806
807 switch (q->clockid) {
808 case CLOCK_REALTIME:
809 q->get_time = ktime_get_real;
810 break;
811 case CLOCK_MONOTONIC:
812 q->get_time = ktime_get;
813 break;
814 case CLOCK_BOOTTIME:
815 q->get_time = ktime_get_boottime;
816 break;
817 case CLOCK_TAI:
818 q->get_time = ktime_get_clocktai;
819 break;
820 default:
821 NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
822 err = -EINVAL;
823 goto unlock;
824 }
825
826 err = taprio_get_start_time(sch, new_admin, &start);
827 if (err < 0) {
828 NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
829 goto unlock;
830 }
831
832 setup_first_close_time(q, new_admin, start);
833
834
835 spin_lock_irqsave(&q->current_entry_lock, flags);
836
837 taprio_start_sched(sch, start, new_admin);
838
839 rcu_assign_pointer(q->admin_sched, new_admin);
840 if (admin)
841 call_rcu(&admin->rcu, taprio_free_sched_cb);
842 new_admin = NULL;
843
844 spin_unlock_irqrestore(&q->current_entry_lock, flags);
845
846 err = 0;
847
848unlock:
849 spin_unlock_bh(qdisc_lock(sch));
850
851free_sched:
852 kfree(new_admin);
853
854 return err;
855}
856
857static void taprio_destroy(struct Qdisc *sch)
858{
859 struct taprio_sched *q = qdisc_priv(sch);
860 struct net_device *dev = qdisc_dev(sch);
861 unsigned int i;
862
863 spin_lock(&taprio_list_lock);
864 list_del(&q->taprio_list);
865 spin_unlock(&taprio_list_lock);
866
867 hrtimer_cancel(&q->advance_timer);
868
869 if (q->qdiscs) {
870 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
871 qdisc_put(q->qdiscs[i]);
872
873 kfree(q->qdiscs);
874 }
875 q->qdiscs = NULL;
876
877 netdev_set_num_tc(dev, 0);
878
879 if (q->oper_sched)
880 call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
881
882 if (q->admin_sched)
883 call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
884}
885
886static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
887 struct netlink_ext_ack *extack)
888{
889 struct taprio_sched *q = qdisc_priv(sch);
890 struct net_device *dev = qdisc_dev(sch);
891 int i;
892
893 spin_lock_init(&q->current_entry_lock);
894
895 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
896 q->advance_timer.function = advance_sched;
897
898 q->root = sch;
899
900
901
902
903 q->clockid = -1;
904
905 if (sch->parent != TC_H_ROOT)
906 return -EOPNOTSUPP;
907
908 if (!netif_is_multiqueue(dev))
909 return -EOPNOTSUPP;
910
911
912 q->qdiscs = kcalloc(dev->num_tx_queues,
913 sizeof(q->qdiscs[0]),
914 GFP_KERNEL);
915
916 if (!q->qdiscs)
917 return -ENOMEM;
918
919 if (!opt)
920 return -EINVAL;
921
922 spin_lock(&taprio_list_lock);
923 list_add(&q->taprio_list, &taprio_list);
924 spin_unlock(&taprio_list_lock);
925
926 for (i = 0; i < dev->num_tx_queues; i++) {
927 struct netdev_queue *dev_queue;
928 struct Qdisc *qdisc;
929
930 dev_queue = netdev_get_tx_queue(dev, i);
931 qdisc = qdisc_create_dflt(dev_queue,
932 &pfifo_qdisc_ops,
933 TC_H_MAKE(TC_H_MAJ(sch->handle),
934 TC_H_MIN(i + 1)),
935 extack);
936 if (!qdisc)
937 return -ENOMEM;
938
939 if (i < dev->real_num_tx_queues)
940 qdisc_hash_add(qdisc, false);
941
942 q->qdiscs[i] = qdisc;
943 }
944
945 return taprio_change(sch, opt, extack);
946}
947
948static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
949 unsigned long cl)
950{
951 struct net_device *dev = qdisc_dev(sch);
952 unsigned long ntx = cl - 1;
953
954 if (ntx >= dev->num_tx_queues)
955 return NULL;
956
957 return netdev_get_tx_queue(dev, ntx);
958}
959
960static int taprio_graft(struct Qdisc *sch, unsigned long cl,
961 struct Qdisc *new, struct Qdisc **old,
962 struct netlink_ext_ack *extack)
963{
964 struct taprio_sched *q = qdisc_priv(sch);
965 struct net_device *dev = qdisc_dev(sch);
966 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
967
968 if (!dev_queue)
969 return -EINVAL;
970
971 if (dev->flags & IFF_UP)
972 dev_deactivate(dev);
973
974 *old = q->qdiscs[cl - 1];
975 q->qdiscs[cl - 1] = new;
976
977 if (new)
978 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
979
980 if (dev->flags & IFF_UP)
981 dev_activate(dev);
982
983 return 0;
984}
985
986static int dump_entry(struct sk_buff *msg,
987 const struct sched_entry *entry)
988{
989 struct nlattr *item;
990
991 item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY);
992 if (!item)
993 return -ENOSPC;
994
995 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
996 goto nla_put_failure;
997
998 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
999 goto nla_put_failure;
1000
1001 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
1002 entry->gate_mask))
1003 goto nla_put_failure;
1004
1005 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
1006 entry->interval))
1007 goto nla_put_failure;
1008
1009 return nla_nest_end(msg, item);
1010
1011nla_put_failure:
1012 nla_nest_cancel(msg, item);
1013 return -1;
1014}
1015
1016static int dump_schedule(struct sk_buff *msg,
1017 const struct sched_gate_list *root)
1018{
1019 struct nlattr *entry_list;
1020 struct sched_entry *entry;
1021
1022 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
1023 root->base_time, TCA_TAPRIO_PAD))
1024 return -1;
1025
1026 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
1027 root->cycle_time, TCA_TAPRIO_PAD))
1028 return -1;
1029
1030 if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
1031 root->cycle_time_extension, TCA_TAPRIO_PAD))
1032 return -1;
1033
1034 entry_list = nla_nest_start(msg,
1035 TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
1036 if (!entry_list)
1037 goto error_nest;
1038
1039 list_for_each_entry(entry, &root->entries, list) {
1040 if (dump_entry(msg, entry) < 0)
1041 goto error_nest;
1042 }
1043
1044 nla_nest_end(msg, entry_list);
1045 return 0;
1046
1047error_nest:
1048 nla_nest_cancel(msg, entry_list);
1049 return -1;
1050}
1051
1052static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
1053{
1054 struct taprio_sched *q = qdisc_priv(sch);
1055 struct net_device *dev = qdisc_dev(sch);
1056 struct sched_gate_list *oper, *admin;
1057 struct tc_mqprio_qopt opt = { 0 };
1058 struct nlattr *nest, *sched_nest;
1059 unsigned int i;
1060
1061 rcu_read_lock();
1062 oper = rcu_dereference(q->oper_sched);
1063 admin = rcu_dereference(q->admin_sched);
1064
1065 opt.num_tc = netdev_get_num_tc(dev);
1066 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
1067
1068 for (i = 0; i < netdev_get_num_tc(dev); i++) {
1069 opt.count[i] = dev->tc_to_txq[i].count;
1070 opt.offset[i] = dev->tc_to_txq[i].offset;
1071 }
1072
1073 nest = nla_nest_start(skb, TCA_OPTIONS);
1074 if (!nest)
1075 goto start_error;
1076
1077 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
1078 goto options_error;
1079
1080 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
1081 goto options_error;
1082
1083 if (oper && dump_schedule(skb, oper))
1084 goto options_error;
1085
1086 if (!admin)
1087 goto done;
1088
1089 sched_nest = nla_nest_start(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
1090 if (!sched_nest)
1091 goto options_error;
1092
1093 if (dump_schedule(skb, admin))
1094 goto admin_error;
1095
1096 nla_nest_end(skb, sched_nest);
1097
1098done:
1099 rcu_read_unlock();
1100
1101 return nla_nest_end(skb, nest);
1102
1103admin_error:
1104 nla_nest_cancel(skb, sched_nest);
1105
1106options_error:
1107 nla_nest_cancel(skb, nest);
1108
1109start_error:
1110 rcu_read_unlock();
1111 return -ENOSPC;
1112}
1113
1114static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
1115{
1116 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
1117
1118 if (!dev_queue)
1119 return NULL;
1120
1121 return dev_queue->qdisc_sleeping;
1122}
1123
1124static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
1125{
1126 unsigned int ntx = TC_H_MIN(classid);
1127
1128 if (!taprio_queue_get(sch, ntx))
1129 return 0;
1130 return ntx;
1131}
1132
1133static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
1134 struct sk_buff *skb, struct tcmsg *tcm)
1135{
1136 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
1137
1138 tcm->tcm_parent = TC_H_ROOT;
1139 tcm->tcm_handle |= TC_H_MIN(cl);
1140 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
1141
1142 return 0;
1143}
1144
1145static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
1146 struct gnet_dump *d)
1147 __releases(d->lock)
1148 __acquires(d->lock)
1149{
1150 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
1151
1152 sch = dev_queue->qdisc_sleeping;
1153 if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
1154 qdisc_qstats_copy(d, sch) < 0)
1155 return -1;
1156 return 0;
1157}
1158
1159static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1160{
1161 struct net_device *dev = qdisc_dev(sch);
1162 unsigned long ntx;
1163
1164 if (arg->stop)
1165 return;
1166
1167 arg->count = arg->skip;
1168 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
1169 if (arg->fn(sch, ntx + 1, arg) < 0) {
1170 arg->stop = 1;
1171 break;
1172 }
1173 arg->count++;
1174 }
1175}
1176
1177static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
1178 struct tcmsg *tcm)
1179{
1180 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
1181}
1182
1183static const struct Qdisc_class_ops taprio_class_ops = {
1184 .graft = taprio_graft,
1185 .leaf = taprio_leaf,
1186 .find = taprio_find,
1187 .walk = taprio_walk,
1188 .dump = taprio_dump_class,
1189 .dump_stats = taprio_dump_class_stats,
1190 .select_queue = taprio_select_queue,
1191};
1192
1193static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
1194 .cl_ops = &taprio_class_ops,
1195 .id = "taprio",
1196 .priv_size = sizeof(struct taprio_sched),
1197 .init = taprio_init,
1198 .change = taprio_change,
1199 .destroy = taprio_destroy,
1200 .peek = taprio_peek,
1201 .dequeue = taprio_dequeue,
1202 .enqueue = taprio_enqueue,
1203 .dump = taprio_dump,
1204 .owner = THIS_MODULE,
1205};
1206
1207static struct notifier_block taprio_device_notifier = {
1208 .notifier_call = taprio_dev_notifier,
1209};
1210
1211static int __init taprio_module_init(void)
1212{
1213 int err = register_netdevice_notifier(&taprio_device_notifier);
1214
1215 if (err)
1216 return err;
1217
1218 return register_qdisc(&taprio_qdisc_ops);
1219}
1220
1221static void __exit taprio_module_exit(void)
1222{
1223 unregister_qdisc(&taprio_qdisc_ops);
1224 unregister_netdevice_notifier(&taprio_device_notifier);
1225}
1226
1227module_init(taprio_module_init);
1228module_exit(taprio_module_exit);
1229MODULE_LICENSE("GPL");
1230