1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/netdevice.h>
30#include <linux/ethtool.h>
31#include <linux/etherdevice.h>
32#include <linux/init.h>
33#include <linux/interrupt.h>
34#include <linux/moduleparam.h>
35#include <linux/netfilter_netdev.h>
36#include <net/pkt_sched.h>
37#include <net/net_namespace.h>
38
39#define TX_Q_LIMIT 32
40
41struct ifb_q_stats {
42 u64 packets;
43 u64 bytes;
44 struct u64_stats_sync sync;
45};
46
47struct ifb_q_private {
48 struct net_device *dev;
49 struct tasklet_struct ifb_tasklet;
50 int tasklet_pending;
51 int txqnum;
52 struct sk_buff_head rq;
53 struct sk_buff_head tq;
54 struct ifb_q_stats rx_stats;
55 struct ifb_q_stats tx_stats;
56} ____cacheline_aligned_in_smp;
57
58struct ifb_dev_private {
59 struct ifb_q_private *tx_private;
60};
61
62
63struct ifb_q_stats_desc {
64 char desc[ETH_GSTRING_LEN];
65 size_t offset;
66};
67
68#define IFB_Q_STAT(m) offsetof(struct ifb_q_stats, m)
69
70static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
71 { "packets", IFB_Q_STAT(packets) },
72 { "bytes", IFB_Q_STAT(bytes) },
73};
74
75#define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc)
76
77static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
78static int ifb_open(struct net_device *dev);
79static int ifb_close(struct net_device *dev);
80
81static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
82{
83 u64_stats_update_begin(&stats->sync);
84 stats->packets++;
85 stats->bytes += len;
86 u64_stats_update_end(&stats->sync);
87}
88
89static void ifb_ri_tasklet(struct tasklet_struct *t)
90{
91 struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
92 struct netdev_queue *txq;
93 struct sk_buff *skb;
94
95 txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
96 skb = skb_peek(&txp->tq);
97 if (!skb) {
98 if (!__netif_tx_trylock(txq))
99 goto resched;
100 skb_queue_splice_tail_init(&txp->rq, &txp->tq);
101 __netif_tx_unlock(txq);
102 }
103
104 while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
105
106 skb->redirected = 0;
107#ifdef CONFIG_NET_CLS_ACT
108 skb->tc_skip_classify = 1;
109#endif
110 nf_skip_egress(skb, true);
111
112 ifb_update_q_stats(&txp->tx_stats, skb->len);
113
114 rcu_read_lock();
115 skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
116 if (!skb->dev) {
117 rcu_read_unlock();
118 dev_kfree_skb(skb);
119 txp->dev->stats.tx_dropped++;
120 if (skb_queue_len(&txp->tq) != 0)
121 goto resched;
122 break;
123 }
124 rcu_read_unlock();
125 skb->skb_iif = txp->dev->ifindex;
126
127 if (!skb->from_ingress) {
128 dev_queue_xmit(skb);
129 } else {
130 skb_pull_rcsum(skb, skb->mac_len);
131 netif_receive_skb(skb);
132 }
133 }
134
135 if (__netif_tx_trylock(txq)) {
136 skb = skb_peek(&txp->rq);
137 if (!skb) {
138 txp->tasklet_pending = 0;
139 if (netif_tx_queue_stopped(txq))
140 netif_tx_wake_queue(txq);
141 } else {
142 __netif_tx_unlock(txq);
143 goto resched;
144 }
145 __netif_tx_unlock(txq);
146 } else {
147resched:
148 txp->tasklet_pending = 1;
149 tasklet_schedule(&txp->ifb_tasklet);
150 }
151
152}
153
154static void ifb_stats64(struct net_device *dev,
155 struct rtnl_link_stats64 *stats)
156{
157 struct ifb_dev_private *dp = netdev_priv(dev);
158 struct ifb_q_private *txp = dp->tx_private;
159 unsigned int start;
160 u64 packets, bytes;
161 int i;
162
163 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
164 do {
165 start = u64_stats_fetch_begin_irq(&txp->rx_stats.sync);
166 packets = txp->rx_stats.packets;
167 bytes = txp->rx_stats.bytes;
168 } while (u64_stats_fetch_retry_irq(&txp->rx_stats.sync, start));
169 stats->rx_packets += packets;
170 stats->rx_bytes += bytes;
171
172 do {
173 start = u64_stats_fetch_begin_irq(&txp->tx_stats.sync);
174 packets = txp->tx_stats.packets;
175 bytes = txp->tx_stats.bytes;
176 } while (u64_stats_fetch_retry_irq(&txp->tx_stats.sync, start));
177 stats->tx_packets += packets;
178 stats->tx_bytes += bytes;
179 }
180 stats->rx_dropped = dev->stats.rx_dropped;
181 stats->tx_dropped = dev->stats.tx_dropped;
182}
183
184static int ifb_dev_init(struct net_device *dev)
185{
186 struct ifb_dev_private *dp = netdev_priv(dev);
187 struct ifb_q_private *txp;
188 int i;
189
190 txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
191 if (!txp)
192 return -ENOMEM;
193 dp->tx_private = txp;
194 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
195 txp->txqnum = i;
196 txp->dev = dev;
197 __skb_queue_head_init(&txp->rq);
198 __skb_queue_head_init(&txp->tq);
199 u64_stats_init(&txp->rx_stats.sync);
200 u64_stats_init(&txp->tx_stats.sync);
201 tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
202 netif_tx_start_queue(netdev_get_tx_queue(dev, i));
203 }
204 return 0;
205}
206
207static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
208{
209 u8 *p = buf;
210 int i, j;
211
212 switch (stringset) {
213 case ETH_SS_STATS:
214 for (i = 0; i < dev->real_num_rx_queues; i++)
215 for (j = 0; j < IFB_Q_STATS_LEN; j++)
216 ethtool_sprintf(&p, "rx_queue_%u_%.18s",
217 i, ifb_q_stats_desc[j].desc);
218
219 for (i = 0; i < dev->real_num_tx_queues; i++)
220 for (j = 0; j < IFB_Q_STATS_LEN; j++)
221 ethtool_sprintf(&p, "tx_queue_%u_%.18s",
222 i, ifb_q_stats_desc[j].desc);
223
224 break;
225 }
226}
227
228static int ifb_get_sset_count(struct net_device *dev, int sset)
229{
230 switch (sset) {
231 case ETH_SS_STATS:
232 return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
233 dev->real_num_tx_queues);
234 default:
235 return -EOPNOTSUPP;
236 }
237}
238
239static void ifb_fill_stats_data(u64 **data,
240 struct ifb_q_stats *q_stats)
241{
242 void *stats_base = (void *)q_stats;
243 unsigned int start;
244 size_t offset;
245 int j;
246
247 do {
248 start = u64_stats_fetch_begin_irq(&q_stats->sync);
249 for (j = 0; j < IFB_Q_STATS_LEN; j++) {
250 offset = ifb_q_stats_desc[j].offset;
251 (*data)[j] = *(u64 *)(stats_base + offset);
252 }
253 } while (u64_stats_fetch_retry_irq(&q_stats->sync, start));
254
255 *data += IFB_Q_STATS_LEN;
256}
257
258static void ifb_get_ethtool_stats(struct net_device *dev,
259 struct ethtool_stats *stats, u64 *data)
260{
261 struct ifb_dev_private *dp = netdev_priv(dev);
262 struct ifb_q_private *txp;
263 int i;
264
265 for (i = 0; i < dev->real_num_rx_queues; i++) {
266 txp = dp->tx_private + i;
267 ifb_fill_stats_data(&data, &txp->rx_stats);
268 }
269
270 for (i = 0; i < dev->real_num_tx_queues; i++) {
271 txp = dp->tx_private + i;
272 ifb_fill_stats_data(&data, &txp->tx_stats);
273 }
274}
275
276static const struct net_device_ops ifb_netdev_ops = {
277 .ndo_open = ifb_open,
278 .ndo_stop = ifb_close,
279 .ndo_get_stats64 = ifb_stats64,
280 .ndo_start_xmit = ifb_xmit,
281 .ndo_validate_addr = eth_validate_addr,
282 .ndo_init = ifb_dev_init,
283};
284
285static const struct ethtool_ops ifb_ethtool_ops = {
286 .get_strings = ifb_get_strings,
287 .get_sset_count = ifb_get_sset_count,
288 .get_ethtool_stats = ifb_get_ethtool_stats,
289};
290
291#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
292 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
293 NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \
294 NETIF_F_HW_VLAN_STAG_TX)
295
296static void ifb_dev_free(struct net_device *dev)
297{
298 struct ifb_dev_private *dp = netdev_priv(dev);
299 struct ifb_q_private *txp = dp->tx_private;
300 int i;
301
302 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
303 tasklet_kill(&txp->ifb_tasklet);
304 __skb_queue_purge(&txp->rq);
305 __skb_queue_purge(&txp->tq);
306 }
307 kfree(dp->tx_private);
308}
309
310static void ifb_setup(struct net_device *dev)
311{
312
313 dev->netdev_ops = &ifb_netdev_ops;
314 dev->ethtool_ops = &ifb_ethtool_ops;
315
316
317 ether_setup(dev);
318 dev->tx_queue_len = TX_Q_LIMIT;
319
320 dev->features |= IFB_FEATURES;
321 dev->hw_features |= dev->features;
322 dev->hw_enc_features |= dev->features;
323 dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
324 NETIF_F_HW_VLAN_STAG_TX);
325
326 dev->flags |= IFF_NOARP;
327 dev->flags &= ~IFF_MULTICAST;
328 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
329 netif_keep_dst(dev);
330 eth_hw_addr_random(dev);
331 dev->needs_free_netdev = true;
332 dev->priv_destructor = ifb_dev_free;
333
334 dev->min_mtu = 0;
335 dev->max_mtu = 0;
336}
337
338static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
339{
340 struct ifb_dev_private *dp = netdev_priv(dev);
341 struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
342
343 ifb_update_q_stats(&txp->rx_stats, skb->len);
344
345 if (!skb->redirected || !skb->skb_iif) {
346 dev_kfree_skb(skb);
347 dev->stats.rx_dropped++;
348 return NETDEV_TX_OK;
349 }
350
351 if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
352 netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
353
354 __skb_queue_tail(&txp->rq, skb);
355 if (!txp->tasklet_pending) {
356 txp->tasklet_pending = 1;
357 tasklet_schedule(&txp->ifb_tasklet);
358 }
359
360 return NETDEV_TX_OK;
361}
362
363static int ifb_close(struct net_device *dev)
364{
365 netif_tx_stop_all_queues(dev);
366 return 0;
367}
368
369static int ifb_open(struct net_device *dev)
370{
371 netif_tx_start_all_queues(dev);
372 return 0;
373}
374
375static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
376 struct netlink_ext_ack *extack)
377{
378 if (tb[IFLA_ADDRESS]) {
379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
380 return -EINVAL;
381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
382 return -EADDRNOTAVAIL;
383 }
384 return 0;
385}
386
387static struct rtnl_link_ops ifb_link_ops __read_mostly = {
388 .kind = "ifb",
389 .priv_size = sizeof(struct ifb_dev_private),
390 .setup = ifb_setup,
391 .validate = ifb_validate,
392};
393
394
395
396
397
398static int numifbs = 2;
399module_param(numifbs, int, 0);
400MODULE_PARM_DESC(numifbs, "Number of ifb devices");
401
402static int __init ifb_init_one(int index)
403{
404 struct net_device *dev_ifb;
405 int err;
406
407 dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
408 NET_NAME_UNKNOWN, ifb_setup);
409
410 if (!dev_ifb)
411 return -ENOMEM;
412
413 dev_ifb->rtnl_link_ops = &ifb_link_ops;
414 err = register_netdevice(dev_ifb);
415 if (err < 0)
416 goto err;
417
418 return 0;
419
420err:
421 free_netdev(dev_ifb);
422 return err;
423}
424
425static int __init ifb_init_module(void)
426{
427 int i, err;
428
429 down_write(&pernet_ops_rwsem);
430 rtnl_lock();
431 err = __rtnl_link_register(&ifb_link_ops);
432 if (err < 0)
433 goto out;
434
435 for (i = 0; i < numifbs && !err; i++) {
436 err = ifb_init_one(i);
437 cond_resched();
438 }
439 if (err)
440 __rtnl_link_unregister(&ifb_link_ops);
441
442out:
443 rtnl_unlock();
444 up_write(&pernet_ops_rwsem);
445
446 return err;
447}
448
449static void __exit ifb_cleanup_module(void)
450{
451 rtnl_link_unregister(&ifb_link_ops);
452}
453
454module_init(ifb_init_module);
455module_exit(ifb_cleanup_module);
456MODULE_LICENSE("GPL");
457MODULE_AUTHOR("Jamal Hadi Salim");
458MODULE_ALIAS_RTNL_LINK("ifb");
459