1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/bitops.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/sched.h>
19#include <linux/string.h>
20#include <linux/errno.h>
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24#include <linux/init.h>
25#include <linux/rcupdate.h>
26#include <linux/list.h>
27#include <linux/slab.h>
28#include <linux/if_vlan.h>
29#include <linux/skb_array.h>
30#include <linux/if_macvlan.h>
31#include <net/sch_generic.h>
32#include <net/pkt_sched.h>
33#include <net/dst.h>
34#include <trace/events/qdisc.h>
35#include <trace/events/net.h>
36#include <net/xfrm.h>
37
38
39const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
40EXPORT_SYMBOL(default_qdisc_ops);
41
42
43
44
45
46
47
48
49
50
51
52
53static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
54{
55 const struct netdev_queue *txq = q->dev_queue;
56 spinlock_t *lock = NULL;
57 struct sk_buff *skb;
58
59 if (q->flags & TCQ_F_NOLOCK) {
60 lock = qdisc_lock(q);
61 spin_lock(lock);
62 }
63
64 skb = skb_peek(&q->skb_bad_txq);
65 if (skb) {
66
67 txq = skb_get_tx_queue(txq->dev, skb);
68 if (!netif_xmit_frozen_or_stopped(txq)) {
69 skb = __skb_dequeue(&q->skb_bad_txq);
70 if (qdisc_is_percpu_stats(q)) {
71 qdisc_qstats_cpu_backlog_dec(q, skb);
72 qdisc_qstats_cpu_qlen_dec(q);
73 } else {
74 qdisc_qstats_backlog_dec(q, skb);
75 q->q.qlen--;
76 }
77 } else {
78 skb = NULL;
79 }
80 }
81
82 if (lock)
83 spin_unlock(lock);
84
85 return skb;
86}
87
88static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
89{
90 struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
91
92 if (unlikely(skb))
93 skb = __skb_dequeue_bad_txq(q);
94
95 return skb;
96}
97
98static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
99 struct sk_buff *skb)
100{
101 spinlock_t *lock = NULL;
102
103 if (q->flags & TCQ_F_NOLOCK) {
104 lock = qdisc_lock(q);
105 spin_lock(lock);
106 }
107
108 __skb_queue_tail(&q->skb_bad_txq, skb);
109
110 if (qdisc_is_percpu_stats(q)) {
111 qdisc_qstats_cpu_backlog_inc(q, skb);
112 qdisc_qstats_cpu_qlen_inc(q);
113 } else {
114 qdisc_qstats_backlog_inc(q, skb);
115 q->q.qlen++;
116 }
117
118 if (lock)
119 spin_unlock(lock);
120}
121
122static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
123{
124 spinlock_t *lock = NULL;
125
126 if (q->flags & TCQ_F_NOLOCK) {
127 lock = qdisc_lock(q);
128 spin_lock(lock);
129 }
130
131 while (skb) {
132 struct sk_buff *next = skb->next;
133
134 __skb_queue_tail(&q->gso_skb, skb);
135
136
137 if (qdisc_is_percpu_stats(q)) {
138 qdisc_qstats_cpu_requeues_inc(q);
139 qdisc_qstats_cpu_backlog_inc(q, skb);
140 qdisc_qstats_cpu_qlen_inc(q);
141 } else {
142 q->qstats.requeues++;
143 qdisc_qstats_backlog_inc(q, skb);
144 q->q.qlen++;
145 }
146
147 skb = next;
148 }
149 if (lock)
150 spin_unlock(lock);
151 __netif_schedule(q);
152}
153
154static void try_bulk_dequeue_skb(struct Qdisc *q,
155 struct sk_buff *skb,
156 const struct netdev_queue *txq,
157 int *packets)
158{
159 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
160
161 while (bytelimit > 0) {
162 struct sk_buff *nskb = q->dequeue(q);
163
164 if (!nskb)
165 break;
166
167 bytelimit -= nskb->len;
168 skb->next = nskb;
169 skb = nskb;
170 (*packets)++;
171 }
172 skb_mark_not_on_list(skb);
173}
174
175
176
177
178static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
179 struct sk_buff *skb,
180 int *packets)
181{
182 int mapping = skb_get_queue_mapping(skb);
183 struct sk_buff *nskb;
184 int cnt = 0;
185
186 do {
187 nskb = q->dequeue(q);
188 if (!nskb)
189 break;
190 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
191 qdisc_enqueue_skb_bad_txq(q, nskb);
192 break;
193 }
194 skb->next = nskb;
195 skb = nskb;
196 } while (++cnt < 8);
197 (*packets) += cnt;
198 skb_mark_not_on_list(skb);
199}
200
201
202
203
204static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
205 int *packets)
206{
207 const struct netdev_queue *txq = q->dev_queue;
208 struct sk_buff *skb = NULL;
209
210 *packets = 1;
211 if (unlikely(!skb_queue_empty(&q->gso_skb))) {
212 spinlock_t *lock = NULL;
213
214 if (q->flags & TCQ_F_NOLOCK) {
215 lock = qdisc_lock(q);
216 spin_lock(lock);
217 }
218
219 skb = skb_peek(&q->gso_skb);
220
221
222
223
224 if (!skb) {
225 if (lock)
226 spin_unlock(lock);
227 goto validate;
228 }
229
230
231 *validate = false;
232 if (xfrm_offload(skb))
233 *validate = true;
234
235 txq = skb_get_tx_queue(txq->dev, skb);
236 if (!netif_xmit_frozen_or_stopped(txq)) {
237 skb = __skb_dequeue(&q->gso_skb);
238 if (qdisc_is_percpu_stats(q)) {
239 qdisc_qstats_cpu_backlog_dec(q, skb);
240 qdisc_qstats_cpu_qlen_dec(q);
241 } else {
242 qdisc_qstats_backlog_dec(q, skb);
243 q->q.qlen--;
244 }
245 } else {
246 skb = NULL;
247 }
248 if (lock)
249 spin_unlock(lock);
250 goto trace;
251 }
252validate:
253 *validate = true;
254
255 if ((q->flags & TCQ_F_ONETXQUEUE) &&
256 netif_xmit_frozen_or_stopped(txq))
257 return skb;
258
259 skb = qdisc_dequeue_skb_bad_txq(q);
260 if (unlikely(skb))
261 goto bulk;
262 skb = q->dequeue(q);
263 if (skb) {
264bulk:
265 if (qdisc_may_bulk(q))
266 try_bulk_dequeue_skb(q, skb, txq, packets);
267 else
268 try_bulk_dequeue_skb_slow(q, skb, packets);
269 }
270trace:
271 trace_qdisc_dequeue(q, txq, *packets, skb);
272 return skb;
273}
274
275
276
277
278
279
280
281
282
283
284bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
285 struct net_device *dev, struct netdev_queue *txq,
286 spinlock_t *root_lock, bool validate)
287{
288 int ret = NETDEV_TX_BUSY;
289 bool again = false;
290
291
292 if (root_lock)
293 spin_unlock(root_lock);
294
295
296 if (validate)
297 skb = validate_xmit_skb_list(skb, dev, &again);
298
299#ifdef CONFIG_XFRM_OFFLOAD
300 if (unlikely(again)) {
301 if (root_lock)
302 spin_lock(root_lock);
303
304 dev_requeue_skb(skb, q);
305 return false;
306 }
307#endif
308
309 if (likely(skb)) {
310 HARD_TX_LOCK(dev, txq, smp_processor_id());
311 if (!netif_xmit_frozen_or_stopped(txq))
312 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
313
314 HARD_TX_UNLOCK(dev, txq);
315 } else {
316 if (root_lock)
317 spin_lock(root_lock);
318 return true;
319 }
320
321 if (root_lock)
322 spin_lock(root_lock);
323
324 if (!dev_xmit_complete(ret)) {
325
326 if (unlikely(ret != NETDEV_TX_BUSY))
327 net_warn_ratelimited("BUG %s code %d qlen %d\n",
328 dev->name, ret, q->q.qlen);
329
330 dev_requeue_skb(skb, q);
331 return false;
332 }
333
334 return true;
335}
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356static inline bool qdisc_restart(struct Qdisc *q, int *packets)
357{
358 spinlock_t *root_lock = NULL;
359 struct netdev_queue *txq;
360 struct net_device *dev;
361 struct sk_buff *skb;
362 bool validate;
363
364
365 skb = dequeue_skb(q, &validate, packets);
366 if (unlikely(!skb))
367 return false;
368
369 if (!(q->flags & TCQ_F_NOLOCK))
370 root_lock = qdisc_lock(q);
371
372 dev = qdisc_dev(q);
373 txq = skb_get_tx_queue(dev, skb);
374
375 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
376}
377
378void __qdisc_run(struct Qdisc *q)
379{
380 int quota = dev_tx_weight;
381 int packets;
382
383 while (qdisc_restart(q, &packets)) {
384
385
386
387
388
389 quota -= packets;
390 if (quota <= 0 || need_resched()) {
391 __netif_schedule(q);
392 break;
393 }
394 }
395}
396
397unsigned long dev_trans_start(struct net_device *dev)
398{
399 unsigned long val, res;
400 unsigned int i;
401
402 if (is_vlan_dev(dev))
403 dev = vlan_dev_real_dev(dev);
404 else if (netif_is_macvlan(dev))
405 dev = macvlan_dev_real_dev(dev);
406 res = netdev_get_tx_queue(dev, 0)->trans_start;
407 for (i = 1; i < dev->num_tx_queues; i++) {
408 val = netdev_get_tx_queue(dev, i)->trans_start;
409 if (val && time_after(val, res))
410 res = val;
411 }
412
413 return res;
414}
415EXPORT_SYMBOL(dev_trans_start);
416
417static void dev_watchdog(struct timer_list *t)
418{
419 struct net_device *dev = from_timer(dev, t, watchdog_timer);
420
421 netif_tx_lock(dev);
422 if (!qdisc_tx_is_noop(dev)) {
423 if (netif_device_present(dev) &&
424 netif_running(dev) &&
425 netif_carrier_ok(dev)) {
426 int some_queue_timedout = 0;
427 unsigned int i;
428 unsigned long trans_start;
429
430 for (i = 0; i < dev->num_tx_queues; i++) {
431 struct netdev_queue *txq;
432
433 txq = netdev_get_tx_queue(dev, i);
434 trans_start = txq->trans_start;
435 if (netif_xmit_stopped(txq) &&
436 time_after(jiffies, (trans_start +
437 dev->watchdog_timeo))) {
438 some_queue_timedout = 1;
439 txq->trans_timeout++;
440 break;
441 }
442 }
443
444 if (some_queue_timedout) {
445 trace_net_dev_xmit_timeout(dev, i);
446 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
447 dev->name, netdev_drivername(dev), i);
448 dev->netdev_ops->ndo_tx_timeout(dev);
449 }
450 if (!mod_timer(&dev->watchdog_timer,
451 round_jiffies(jiffies +
452 dev->watchdog_timeo)))
453 dev_hold(dev);
454 }
455 }
456 netif_tx_unlock(dev);
457
458 dev_put(dev);
459}
460
461void __netdev_watchdog_up(struct net_device *dev)
462{
463 if (dev->netdev_ops->ndo_tx_timeout) {
464 if (dev->watchdog_timeo <= 0)
465 dev->watchdog_timeo = 5*HZ;
466 if (!mod_timer(&dev->watchdog_timer,
467 round_jiffies(jiffies + dev->watchdog_timeo)))
468 dev_hold(dev);
469 }
470}
471
472static void dev_watchdog_up(struct net_device *dev)
473{
474 __netdev_watchdog_up(dev);
475}
476
477static void dev_watchdog_down(struct net_device *dev)
478{
479 netif_tx_lock_bh(dev);
480 if (del_timer(&dev->watchdog_timer))
481 dev_put(dev);
482 netif_tx_unlock_bh(dev);
483}
484
485
486
487
488
489
490
491void netif_carrier_on(struct net_device *dev)
492{
493 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
494 if (dev->reg_state == NETREG_UNINITIALIZED)
495 return;
496 atomic_inc(&dev->carrier_up_count);
497 linkwatch_fire_event(dev);
498 if (netif_running(dev))
499 __netdev_watchdog_up(dev);
500 }
501}
502EXPORT_SYMBOL(netif_carrier_on);
503
504
505
506
507
508
509
510void netif_carrier_off(struct net_device *dev)
511{
512 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
513 if (dev->reg_state == NETREG_UNINITIALIZED)
514 return;
515 atomic_inc(&dev->carrier_down_count);
516 linkwatch_fire_event(dev);
517 }
518}
519EXPORT_SYMBOL(netif_carrier_off);
520
521
522
523
524
525
526static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
527 struct sk_buff **to_free)
528{
529 __qdisc_drop(skb, to_free);
530 return NET_XMIT_CN;
531}
532
533static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
534{
535 return NULL;
536}
537
538struct Qdisc_ops noop_qdisc_ops __read_mostly = {
539 .id = "noop",
540 .priv_size = 0,
541 .enqueue = noop_enqueue,
542 .dequeue = noop_dequeue,
543 .peek = noop_dequeue,
544 .owner = THIS_MODULE,
545};
546
547static struct netdev_queue noop_netdev_queue = {
548 RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
549 .qdisc_sleeping = &noop_qdisc,
550};
551
552struct Qdisc noop_qdisc = {
553 .enqueue = noop_enqueue,
554 .dequeue = noop_dequeue,
555 .flags = TCQ_F_BUILTIN,
556 .ops = &noop_qdisc_ops,
557 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
558 .dev_queue = &noop_netdev_queue,
559 .running = SEQCNT_ZERO(noop_qdisc.running),
560 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
561 .gso_skb = {
562 .next = (struct sk_buff *)&noop_qdisc.gso_skb,
563 .prev = (struct sk_buff *)&noop_qdisc.gso_skb,
564 .qlen = 0,
565 .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
566 },
567 .skb_bad_txq = {
568 .next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
569 .prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
570 .qlen = 0,
571 .lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
572 },
573};
574EXPORT_SYMBOL(noop_qdisc);
575
576static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
577 struct netlink_ext_ack *extack)
578{
579
580
581
582 qdisc->enqueue = NULL;
583 return 0;
584}
585
586struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
587 .id = "noqueue",
588 .priv_size = 0,
589 .init = noqueue_init,
590 .enqueue = noop_enqueue,
591 .dequeue = noop_dequeue,
592 .peek = noop_dequeue,
593 .owner = THIS_MODULE,
594};
595
596static const u8 prio2band[TC_PRIO_MAX + 1] = {
597 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
598};
599
600
601
602
603
604#define PFIFO_FAST_BANDS 3
605
606
607
608
609
610struct pfifo_fast_priv {
611 struct skb_array q[PFIFO_FAST_BANDS];
612};
613
614static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
615 int band)
616{
617 return &priv->q[band];
618}
619
620static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
621 struct sk_buff **to_free)
622{
623 int band = prio2band[skb->priority & TC_PRIO_MAX];
624 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
625 struct skb_array *q = band2list(priv, band);
626 unsigned int pkt_len = qdisc_pkt_len(skb);
627 int err;
628
629 err = skb_array_produce(q, skb);
630
631 if (unlikely(err)) {
632 if (qdisc_is_percpu_stats(qdisc))
633 return qdisc_drop_cpu(skb, qdisc, to_free);
634 else
635 return qdisc_drop(skb, qdisc, to_free);
636 }
637
638 qdisc_update_stats_at_enqueue(qdisc, pkt_len);
639 return NET_XMIT_SUCCESS;
640}
641
642static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
643{
644 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
645 struct sk_buff *skb = NULL;
646 int band;
647
648 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
649 struct skb_array *q = band2list(priv, band);
650
651 if (__skb_array_empty(q))
652 continue;
653
654 skb = __skb_array_consume(q);
655 }
656 if (likely(skb)) {
657 qdisc_update_stats_at_dequeue(qdisc, skb);
658 } else {
659 qdisc->empty = true;
660 }
661
662 return skb;
663}
664
665static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
666{
667 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
668 struct sk_buff *skb = NULL;
669 int band;
670
671 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
672 struct skb_array *q = band2list(priv, band);
673
674 skb = __skb_array_peek(q);
675 }
676
677 return skb;
678}
679
680static void pfifo_fast_reset(struct Qdisc *qdisc)
681{
682 int i, band;
683 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
684
685 for (band = 0; band < PFIFO_FAST_BANDS; band++) {
686 struct skb_array *q = band2list(priv, band);
687 struct sk_buff *skb;
688
689
690
691
692 if (!q->ring.queue)
693 continue;
694
695 while ((skb = __skb_array_consume(q)) != NULL)
696 kfree_skb(skb);
697 }
698
699 if (qdisc_is_percpu_stats(qdisc)) {
700 for_each_possible_cpu(i) {
701 struct gnet_stats_queue *q;
702
703 q = per_cpu_ptr(qdisc->cpu_qstats, i);
704 q->backlog = 0;
705 q->qlen = 0;
706 }
707 }
708}
709
710static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
711{
712 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
713
714 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
715 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
716 goto nla_put_failure;
717 return skb->len;
718
719nla_put_failure:
720 return -1;
721}
722
723static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
724 struct netlink_ext_ack *extack)
725{
726 unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
727 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
728 int prio;
729
730
731 if (!qlen)
732 return -EINVAL;
733
734 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
735 struct skb_array *q = band2list(priv, prio);
736 int err;
737
738 err = skb_array_init(q, qlen, GFP_KERNEL);
739 if (err)
740 return -ENOMEM;
741 }
742
743
744 qdisc->flags |= TCQ_F_CAN_BYPASS;
745 return 0;
746}
747
748static void pfifo_fast_destroy(struct Qdisc *sch)
749{
750 struct pfifo_fast_priv *priv = qdisc_priv(sch);
751 int prio;
752
753 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
754 struct skb_array *q = band2list(priv, prio);
755
756
757
758
759 if (!q->ring.queue)
760 continue;
761
762
763
764 ptr_ring_cleanup(&q->ring, NULL);
765 }
766}
767
768static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
769 unsigned int new_len)
770{
771 struct pfifo_fast_priv *priv = qdisc_priv(sch);
772 struct skb_array *bands[PFIFO_FAST_BANDS];
773 int prio;
774
775 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
776 struct skb_array *q = band2list(priv, prio);
777
778 bands[prio] = q;
779 }
780
781 return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
782 GFP_KERNEL);
783}
784
785struct Qdisc_ops pfifo_fast_ops __read_mostly = {
786 .id = "pfifo_fast",
787 .priv_size = sizeof(struct pfifo_fast_priv),
788 .enqueue = pfifo_fast_enqueue,
789 .dequeue = pfifo_fast_dequeue,
790 .peek = pfifo_fast_peek,
791 .init = pfifo_fast_init,
792 .destroy = pfifo_fast_destroy,
793 .reset = pfifo_fast_reset,
794 .dump = pfifo_fast_dump,
795 .change_tx_queue_len = pfifo_fast_change_tx_queue_len,
796 .owner = THIS_MODULE,
797 .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
798};
799EXPORT_SYMBOL(pfifo_fast_ops);
800
801static struct lock_class_key qdisc_tx_busylock;
802static struct lock_class_key qdisc_running_key;
803
804struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
805 const struct Qdisc_ops *ops,
806 struct netlink_ext_ack *extack)
807{
808 void *p;
809 struct Qdisc *sch;
810 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
811 int err = -ENOBUFS;
812 struct net_device *dev;
813
814 if (!dev_queue) {
815 NL_SET_ERR_MSG(extack, "No device queue given");
816 err = -EINVAL;
817 goto errout;
818 }
819
820 dev = dev_queue->dev;
821 p = kzalloc_node(size, GFP_KERNEL,
822 netdev_queue_numa_node_read(dev_queue));
823
824 if (!p)
825 goto errout;
826 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
827
828 if (sch != p) {
829 kfree(p);
830 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
831 netdev_queue_numa_node_read(dev_queue));
832 if (!p)
833 goto errout;
834 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
835 sch->padded = (char *) sch - (char *) p;
836 }
837 __skb_queue_head_init(&sch->gso_skb);
838 __skb_queue_head_init(&sch->skb_bad_txq);
839 qdisc_skb_head_init(&sch->q);
840 spin_lock_init(&sch->q.lock);
841
842 if (ops->static_flags & TCQ_F_CPUSTATS) {
843 sch->cpu_bstats =
844 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
845 if (!sch->cpu_bstats)
846 goto errout1;
847
848 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
849 if (!sch->cpu_qstats) {
850 free_percpu(sch->cpu_bstats);
851 goto errout1;
852 }
853 }
854
855 spin_lock_init(&sch->busylock);
856 lockdep_set_class(&sch->busylock,
857 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
858
859
860 spin_lock_init(&sch->seqlock);
861 lockdep_set_class(&sch->busylock,
862 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
863
864 seqcount_init(&sch->running);
865 lockdep_set_class(&sch->running,
866 dev->qdisc_running_key ?: &qdisc_running_key);
867
868 sch->ops = ops;
869 sch->flags = ops->static_flags;
870 sch->enqueue = ops->enqueue;
871 sch->dequeue = ops->dequeue;
872 sch->dev_queue = dev_queue;
873 sch->empty = true;
874 dev_hold(dev);
875 refcount_set(&sch->refcnt, 1);
876
877 return sch;
878errout1:
879 kfree(p);
880errout:
881 return ERR_PTR(err);
882}
883
884struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
885 const struct Qdisc_ops *ops,
886 unsigned int parentid,
887 struct netlink_ext_ack *extack)
888{
889 struct Qdisc *sch;
890
891 if (!try_module_get(ops->owner)) {
892 NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
893 return NULL;
894 }
895
896 sch = qdisc_alloc(dev_queue, ops, extack);
897 if (IS_ERR(sch)) {
898 module_put(ops->owner);
899 return NULL;
900 }
901 sch->parent = parentid;
902
903 if (!ops->init || ops->init(sch, NULL, extack) == 0)
904 return sch;
905
906 qdisc_put(sch);
907 return NULL;
908}
909EXPORT_SYMBOL(qdisc_create_dflt);
910
911
912
913void qdisc_reset(struct Qdisc *qdisc)
914{
915 const struct Qdisc_ops *ops = qdisc->ops;
916 struct sk_buff *skb, *tmp;
917
918 if (ops->reset)
919 ops->reset(qdisc);
920
921 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
922 __skb_unlink(skb, &qdisc->gso_skb);
923 kfree_skb_list(skb);
924 }
925
926 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
927 __skb_unlink(skb, &qdisc->skb_bad_txq);
928 kfree_skb_list(skb);
929 }
930
931 qdisc->q.qlen = 0;
932 qdisc->qstats.backlog = 0;
933}
934EXPORT_SYMBOL(qdisc_reset);
935
936void qdisc_free(struct Qdisc *qdisc)
937{
938 if (qdisc_is_percpu_stats(qdisc)) {
939 free_percpu(qdisc->cpu_bstats);
940 free_percpu(qdisc->cpu_qstats);
941 }
942
943 kfree((char *) qdisc - qdisc->padded);
944}
945
946static void qdisc_free_cb(struct rcu_head *head)
947{
948 struct Qdisc *q = container_of(head, struct Qdisc, rcu);
949
950 qdisc_free(q);
951}
952
953static void qdisc_destroy(struct Qdisc *qdisc)
954{
955 const struct Qdisc_ops *ops = qdisc->ops;
956 struct sk_buff *skb, *tmp;
957
958#ifdef CONFIG_NET_SCHED
959 qdisc_hash_del(qdisc);
960
961 qdisc_put_stab(rtnl_dereference(qdisc->stab));
962#endif
963 gen_kill_estimator(&qdisc->rate_est);
964 if (ops->reset)
965 ops->reset(qdisc);
966 if (ops->destroy)
967 ops->destroy(qdisc);
968
969 module_put(ops->owner);
970 dev_put(qdisc_dev(qdisc));
971
972 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
973 __skb_unlink(skb, &qdisc->gso_skb);
974 kfree_skb_list(skb);
975 }
976
977 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
978 __skb_unlink(skb, &qdisc->skb_bad_txq);
979 kfree_skb_list(skb);
980 }
981
982 call_rcu(&qdisc->rcu, qdisc_free_cb);
983}
984
985void qdisc_put(struct Qdisc *qdisc)
986{
987 if (qdisc->flags & TCQ_F_BUILTIN ||
988 !refcount_dec_and_test(&qdisc->refcnt))
989 return;
990
991 qdisc_destroy(qdisc);
992}
993EXPORT_SYMBOL(qdisc_put);
994
995
996
997
998
999
1000void qdisc_put_unlocked(struct Qdisc *qdisc)
1001{
1002 if (qdisc->flags & TCQ_F_BUILTIN ||
1003 !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
1004 return;
1005
1006 qdisc_destroy(qdisc);
1007 rtnl_unlock();
1008}
1009EXPORT_SYMBOL(qdisc_put_unlocked);
1010
1011
1012struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
1013 struct Qdisc *qdisc)
1014{
1015 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
1016 spinlock_t *root_lock;
1017
1018 root_lock = qdisc_lock(oqdisc);
1019 spin_lock_bh(root_lock);
1020
1021
1022 if (qdisc == NULL)
1023 qdisc = &noop_qdisc;
1024 dev_queue->qdisc_sleeping = qdisc;
1025 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
1026
1027 spin_unlock_bh(root_lock);
1028
1029 return oqdisc;
1030}
1031EXPORT_SYMBOL(dev_graft_qdisc);
1032
1033static void attach_one_default_qdisc(struct net_device *dev,
1034 struct netdev_queue *dev_queue,
1035 void *_unused)
1036{
1037 struct Qdisc *qdisc;
1038 const struct Qdisc_ops *ops = default_qdisc_ops;
1039
1040 if (dev->priv_flags & IFF_NO_QUEUE)
1041 ops = &noqueue_qdisc_ops;
1042
1043 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
1044 if (!qdisc) {
1045 netdev_info(dev, "activation failed\n");
1046 return;
1047 }
1048 if (!netif_is_multiqueue(dev))
1049 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1050 dev_queue->qdisc_sleeping = qdisc;
1051}
1052
1053static void attach_default_qdiscs(struct net_device *dev)
1054{
1055 struct netdev_queue *txq;
1056 struct Qdisc *qdisc;
1057
1058 txq = netdev_get_tx_queue(dev, 0);
1059
1060 if (!netif_is_multiqueue(dev) ||
1061 dev->priv_flags & IFF_NO_QUEUE) {
1062 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1063 dev->qdisc = txq->qdisc_sleeping;
1064 qdisc_refcount_inc(dev->qdisc);
1065 } else {
1066 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
1067 if (qdisc) {
1068 dev->qdisc = qdisc;
1069 qdisc->ops->attach(qdisc);
1070 }
1071 }
1072#ifdef CONFIG_NET_SCHED
1073 if (dev->qdisc != &noop_qdisc)
1074 qdisc_hash_add(dev->qdisc, false);
1075#endif
1076}
1077
1078static void transition_one_qdisc(struct net_device *dev,
1079 struct netdev_queue *dev_queue,
1080 void *_need_watchdog)
1081{
1082 struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
1083 int *need_watchdog_p = _need_watchdog;
1084
1085 if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1086 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1087
1088 rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1089 if (need_watchdog_p) {
1090 dev_queue->trans_start = 0;
1091 *need_watchdog_p = 1;
1092 }
1093}
1094
1095void dev_activate(struct net_device *dev)
1096{
1097 int need_watchdog;
1098
1099
1100
1101
1102
1103
1104 if (dev->qdisc == &noop_qdisc)
1105 attach_default_qdiscs(dev);
1106
1107 if (!netif_carrier_ok(dev))
1108
1109 return;
1110
1111 need_watchdog = 0;
1112 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1113 if (dev_ingress_queue(dev))
1114 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1115
1116 if (need_watchdog) {
1117 netif_trans_update(dev);
1118 dev_watchdog_up(dev);
1119 }
1120}
1121EXPORT_SYMBOL(dev_activate);
1122
1123static void dev_deactivate_queue(struct net_device *dev,
1124 struct netdev_queue *dev_queue,
1125 void *_qdisc_default)
1126{
1127 struct Qdisc *qdisc_default = _qdisc_default;
1128 struct Qdisc *qdisc;
1129
1130 qdisc = rtnl_dereference(dev_queue->qdisc);
1131 if (qdisc) {
1132 bool nolock = qdisc->flags & TCQ_F_NOLOCK;
1133
1134 if (nolock)
1135 spin_lock_bh(&qdisc->seqlock);
1136 spin_lock_bh(qdisc_lock(qdisc));
1137
1138 if (!(qdisc->flags & TCQ_F_BUILTIN))
1139 set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1140
1141 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1142 qdisc_reset(qdisc);
1143
1144 spin_unlock_bh(qdisc_lock(qdisc));
1145 if (nolock)
1146 spin_unlock_bh(&qdisc->seqlock);
1147 }
1148}
1149
1150static bool some_qdisc_is_busy(struct net_device *dev)
1151{
1152 unsigned int i;
1153
1154 for (i = 0; i < dev->num_tx_queues; i++) {
1155 struct netdev_queue *dev_queue;
1156 spinlock_t *root_lock;
1157 struct Qdisc *q;
1158 int val;
1159
1160 dev_queue = netdev_get_tx_queue(dev, i);
1161 q = dev_queue->qdisc_sleeping;
1162
1163 root_lock = qdisc_lock(q);
1164 spin_lock_bh(root_lock);
1165
1166 val = (qdisc_is_running(q) ||
1167 test_bit(__QDISC_STATE_SCHED, &q->state));
1168
1169 spin_unlock_bh(root_lock);
1170
1171 if (val)
1172 return true;
1173 }
1174 return false;
1175}
1176
1177static void dev_qdisc_reset(struct net_device *dev,
1178 struct netdev_queue *dev_queue,
1179 void *none)
1180{
1181 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1182
1183 if (qdisc)
1184 qdisc_reset(qdisc);
1185}
1186
1187
1188
1189
1190
1191
1192
1193
1194void dev_deactivate_many(struct list_head *head)
1195{
1196 struct net_device *dev;
1197
1198 list_for_each_entry(dev, head, close_list) {
1199 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1200 &noop_qdisc);
1201 if (dev_ingress_queue(dev))
1202 dev_deactivate_queue(dev, dev_ingress_queue(dev),
1203 &noop_qdisc);
1204
1205 dev_watchdog_down(dev);
1206 }
1207
1208
1209
1210
1211
1212 synchronize_net();
1213
1214
1215 list_for_each_entry(dev, head, close_list) {
1216 while (some_qdisc_is_busy(dev))
1217 yield();
1218
1219
1220
1221 netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1222 if (dev_ingress_queue(dev))
1223 dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1224 }
1225}
1226
1227void dev_deactivate(struct net_device *dev)
1228{
1229 LIST_HEAD(single);
1230
1231 list_add(&dev->close_list, &single);
1232 dev_deactivate_many(&single);
1233 list_del(&single);
1234}
1235EXPORT_SYMBOL(dev_deactivate);
1236
1237static int qdisc_change_tx_queue_len(struct net_device *dev,
1238 struct netdev_queue *dev_queue)
1239{
1240 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1241 const struct Qdisc_ops *ops = qdisc->ops;
1242
1243 if (ops->change_tx_queue_len)
1244 return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1245 return 0;
1246}
1247
1248int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1249{
1250 bool up = dev->flags & IFF_UP;
1251 unsigned int i;
1252 int ret = 0;
1253
1254 if (up)
1255 dev_deactivate(dev);
1256
1257 for (i = 0; i < dev->num_tx_queues; i++) {
1258 ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1259
1260
1261 if (ret)
1262 break;
1263 }
1264
1265 if (up)
1266 dev_activate(dev);
1267 return ret;
1268}
1269
1270static void dev_init_scheduler_queue(struct net_device *dev,
1271 struct netdev_queue *dev_queue,
1272 void *_qdisc)
1273{
1274 struct Qdisc *qdisc = _qdisc;
1275
1276 rcu_assign_pointer(dev_queue->qdisc, qdisc);
1277 dev_queue->qdisc_sleeping = qdisc;
1278}
1279
1280void dev_init_scheduler(struct net_device *dev)
1281{
1282 dev->qdisc = &noop_qdisc;
1283 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1284 if (dev_ingress_queue(dev))
1285 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1286
1287 timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1288}
1289
1290static void shutdown_scheduler_queue(struct net_device *dev,
1291 struct netdev_queue *dev_queue,
1292 void *_qdisc_default)
1293{
1294 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1295 struct Qdisc *qdisc_default = _qdisc_default;
1296
1297 if (qdisc) {
1298 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1299 dev_queue->qdisc_sleeping = qdisc_default;
1300
1301 qdisc_put(qdisc);
1302 }
1303}
1304
1305void dev_shutdown(struct net_device *dev)
1306{
1307 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1308 if (dev_ingress_queue(dev))
1309 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1310 qdisc_put(dev->qdisc);
1311 dev->qdisc = &noop_qdisc;
1312
1313 WARN_ON(timer_pending(&dev->watchdog_timer));
1314}
1315
1316void psched_ratecfg_precompute(struct psched_ratecfg *r,
1317 const struct tc_ratespec *conf,
1318 u64 rate64)
1319{
1320 memset(r, 0, sizeof(*r));
1321 r->overhead = conf->overhead;
1322 r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1323 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1324 r->mult = 1;
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 if (r->rate_bytes_ps > 0) {
1339 u64 factor = NSEC_PER_SEC;
1340
1341 for (;;) {
1342 r->mult = div64_u64(factor, r->rate_bytes_ps);
1343 if (r->mult & (1U << 31) || factor & (1ULL << 63))
1344 break;
1345 factor <<= 1;
1346 r->shift++;
1347 }
1348 }
1349}
1350EXPORT_SYMBOL(psched_ratecfg_precompute);
1351
1352static void mini_qdisc_rcu_func(struct rcu_head *head)
1353{
1354}
1355
1356void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1357 struct tcf_proto *tp_head)
1358{
1359
1360
1361
1362 struct mini_Qdisc *miniq_old =
1363 rcu_dereference_protected(*miniqp->p_miniq, 1);
1364 struct mini_Qdisc *miniq;
1365
1366 if (!tp_head) {
1367 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1368
1369 rcu_barrier();
1370 return;
1371 }
1372
1373 miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1374 &miniqp->miniq1 : &miniqp->miniq2;
1375
1376
1377
1378
1379
1380 rcu_barrier();
1381 miniq->filter_list = tp_head;
1382 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1383
1384 if (miniq_old)
1385
1386
1387
1388
1389 call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
1390}
1391EXPORT_SYMBOL(mini_qdisc_pair_swap);
1392
1393void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1394 struct mini_Qdisc __rcu **p_miniq)
1395{
1396 miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1397 miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1398 miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1399 miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1400 miniqp->p_miniq = p_miniq;
1401}
1402EXPORT_SYMBOL(mini_qdisc_pair_init);
1403