1
2#ifndef __NET_VXLAN_H
3#define __NET_VXLAN_H 1
4
5#include <linux/if_vlan.h>
6#include <net/udp_tunnel.h>
7#include <net/dst_metadata.h>
8#include <net/rtnetlink.h>
9#include <net/switchdev.h>
10
11#define IANA_VXLAN_UDP_PORT 4789
12
13
14
15
16
17
18
19
20
21
22struct vxlanhdr {
23 __be32 vx_flags;
24 __be32 vx_vni;
25};
26
27
28#define VXLAN_HF_VNI cpu_to_be32(BIT(27))
29
30#define VXLAN_N_VID (1u << 24)
31#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
32#define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8)
33#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
34
35#define VNI_HASH_BITS 10
36#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
37#define FDB_HASH_BITS 8
38#define FDB_HASH_SIZE (1<<FDB_HASH_BITS)
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59#define VXLAN_HF_RCO cpu_to_be32(BIT(21))
60
61
62#define VXLAN_RCO_MASK cpu_to_be32(0x7f)
63#define VXLAN_RCO_UDP cpu_to_be32(0x80)
64#define VXLAN_RCO_SHIFT 1
65#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
66#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87struct vxlanhdr_gbp {
88 u8 vx_flags;
89#ifdef __LITTLE_ENDIAN_BITFIELD
90 u8 reserved_flags1:3,
91 policy_applied:1,
92 reserved_flags2:2,
93 dont_learn:1,
94 reserved_flags3:1;
95#elif defined(__BIG_ENDIAN_BITFIELD)
96 u8 reserved_flags1:1,
97 dont_learn:1,
98 reserved_flags2:2,
99 policy_applied:1,
100 reserved_flags3:3;
101#else
102#error "Please fix <asm/byteorder.h>"
103#endif
104 __be16 policy_id;
105 __be32 vx_vni;
106};
107
108
109#define VXLAN_HF_GBP cpu_to_be32(BIT(31))
110
111#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
112
113
114
115
116
117
118
119#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
120#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
121#define VXLAN_GBP_ID_MASK (0xFFFF)
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145struct vxlanhdr_gpe {
146#if defined(__LITTLE_ENDIAN_BITFIELD)
147 u8 oam_flag:1,
148 reserved_flags1:1,
149 np_applied:1,
150 instance_applied:1,
151 version:2,
152 reserved_flags2:2;
153#elif defined(__BIG_ENDIAN_BITFIELD)
154 u8 reserved_flags2:2,
155 version:2,
156 instance_applied:1,
157 np_applied:1,
158 reserved_flags1:1,
159 oam_flag:1;
160#endif
161 u8 reserved_flags3;
162 u8 reserved_flags4;
163 u8 next_protocol;
164 __be32 vx_vni;
165};
166
167
168#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28))
169#define VXLAN_HF_NP cpu_to_be32(BIT(26))
170#define VXLAN_HF_OAM cpu_to_be32(BIT(24))
171
172#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
173 cpu_to_be32(0xff))
174
175struct vxlan_metadata {
176 u32 gbp;
177};
178
179
180struct vxlan_sock {
181 struct hlist_node hlist;
182 struct socket *sock;
183 struct hlist_head vni_list[VNI_HASH_SIZE];
184 refcount_t refcnt;
185 u32 flags;
186};
187
188union vxlan_addr {
189 struct sockaddr_in sin;
190 struct sockaddr_in6 sin6;
191 struct sockaddr sa;
192};
193
194struct vxlan_rdst {
195 union vxlan_addr remote_ip;
196 __be16 remote_port;
197 u8 offloaded:1;
198 __be32 remote_vni;
199 u32 remote_ifindex;
200 struct list_head list;
201 struct rcu_head rcu;
202 struct dst_cache dst_cache;
203};
204
205struct vxlan_config {
206 union vxlan_addr remote_ip;
207 union vxlan_addr saddr;
208 __be32 vni;
209 int remote_ifindex;
210 int mtu;
211 __be16 dst_port;
212 u16 port_min;
213 u16 port_max;
214 u8 tos;
215 u8 ttl;
216 __be32 label;
217 u32 flags;
218 unsigned long age_interval;
219 unsigned int addrmax;
220 bool no_share;
221 enum ifla_vxlan_df df;
222};
223
224struct vxlan_dev_node {
225 struct hlist_node hlist;
226 struct vxlan_dev *vxlan;
227};
228
229
230struct vxlan_dev {
231 struct vxlan_dev_node hlist4;
232#if IS_ENABLED(CONFIG_IPV6)
233 struct vxlan_dev_node hlist6;
234#endif
235 struct list_head next;
236 struct vxlan_sock __rcu *vn4_sock;
237#if IS_ENABLED(CONFIG_IPV6)
238 struct vxlan_sock __rcu *vn6_sock;
239#endif
240 struct net_device *dev;
241 struct net *net;
242 struct vxlan_rdst default_dst;
243
244 struct timer_list age_timer;
245 spinlock_t hash_lock;
246 unsigned int addrcnt;
247 struct gro_cells gro_cells;
248
249 struct vxlan_config cfg;
250
251 struct hlist_head fdb_head[FDB_HASH_SIZE];
252};
253
254#define VXLAN_F_LEARN 0x01
255#define VXLAN_F_PROXY 0x02
256#define VXLAN_F_RSC 0x04
257#define VXLAN_F_L2MISS 0x08
258#define VXLAN_F_L3MISS 0x10
259#define VXLAN_F_IPV6 0x20
260#define VXLAN_F_UDP_ZERO_CSUM_TX 0x40
261#define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80
262#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
263#define VXLAN_F_REMCSUM_TX 0x200
264#define VXLAN_F_REMCSUM_RX 0x400
265#define VXLAN_F_GBP 0x800
266#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
267#define VXLAN_F_COLLECT_METADATA 0x2000
268#define VXLAN_F_GPE 0x4000
269#define VXLAN_F_IPV6_LINKLOCAL 0x8000
270#define VXLAN_F_TTL_INHERIT 0x10000
271
272
273
274
275#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
276 VXLAN_F_GPE | \
277 VXLAN_F_UDP_ZERO_CSUM6_RX | \
278 VXLAN_F_REMCSUM_RX | \
279 VXLAN_F_REMCSUM_NOPARTIAL | \
280 VXLAN_F_COLLECT_METADATA)
281
282
283#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
284 VXLAN_F_IPV6 | \
285 VXLAN_F_IPV6_LINKLOCAL | \
286 VXLAN_F_UDP_ZERO_CSUM_TX | \
287 VXLAN_F_UDP_ZERO_CSUM6_TX | \
288 VXLAN_F_UDP_ZERO_CSUM6_RX | \
289 VXLAN_F_COLLECT_METADATA)
290
291struct net_device *vxlan_dev_create(struct net *net, const char *name,
292 u8 name_assign_type, struct vxlan_config *conf);
293
294static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
295 netdev_features_t features)
296{
297 u8 l4_hdr = 0;
298
299 if (!skb->encapsulation)
300 return features;
301
302 switch (vlan_get_protocol(skb)) {
303 case htons(ETH_P_IP):
304 l4_hdr = ip_hdr(skb)->protocol;
305 break;
306 case htons(ETH_P_IPV6):
307 l4_hdr = ipv6_hdr(skb)->nexthdr;
308 break;
309 default:
310 return features;
311 }
312
313 if ((l4_hdr == IPPROTO_UDP) &&
314 (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
315 skb->inner_protocol != htons(ETH_P_TEB) ||
316 (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
317 sizeof(struct udphdr) + sizeof(struct vxlanhdr)) ||
318 (skb->ip_summed != CHECKSUM_NONE &&
319 !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto))))
320 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
321
322 return features;
323}
324
325
326#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
327
328#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
329
330static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
331{
332 return (struct vxlanhdr *)(udp_hdr(skb) + 1);
333}
334
335static inline __be32 vxlan_vni(__be32 vni_field)
336{
337#if defined(__BIG_ENDIAN)
338 return (__force __be32)((__force u32)vni_field >> 8);
339#else
340 return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8);
341#endif
342}
343
344static inline __be32 vxlan_vni_field(__be32 vni)
345{
346#if defined(__BIG_ENDIAN)
347 return (__force __be32)((__force u32)vni << 8);
348#else
349 return (__force __be32)((__force u32)vni >> 8);
350#endif
351}
352
353static inline size_t vxlan_rco_start(__be32 vni_field)
354{
355 return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
356}
357
358static inline size_t vxlan_rco_offset(__be32 vni_field)
359{
360 return (vni_field & VXLAN_RCO_UDP) ?
361 offsetof(struct udphdr, check) :
362 offsetof(struct tcphdr, check);
363}
364
365static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
366{
367 __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
368
369 if (offset == offsetof(struct udphdr, check))
370 vni_field |= VXLAN_RCO_UDP;
371 return vni_field;
372}
373
374static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
375{
376 return vs->sock->sk->sk_family;
377}
378
379#if IS_ENABLED(CONFIG_IPV6)
380
381static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
382{
383 if (ipa->sa.sa_family == AF_INET6)
384 return ipv6_addr_any(&ipa->sin6.sin6_addr);
385 else
386 return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
387}
388
389static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
390{
391 if (ipa->sa.sa_family == AF_INET6)
392 return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
393 else
394 return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
395}
396
397#else
398
399static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
400{
401 return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
402}
403
404static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
405{
406 return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
407}
408
409#endif
410
411static inline bool netif_is_vxlan(const struct net_device *dev)
412{
413 return dev->rtnl_link_ops &&
414 !strcmp(dev->rtnl_link_ops->kind, "vxlan");
415}
416
417struct switchdev_notifier_vxlan_fdb_info {
418 struct switchdev_notifier_info info;
419 union vxlan_addr remote_ip;
420 __be16 remote_port;
421 __be32 remote_vni;
422 u32 remote_ifindex;
423 u8 eth_addr[ETH_ALEN];
424 __be32 vni;
425 bool offloaded;
426 bool added_by_user;
427};
428
429#if IS_ENABLED(CONFIG_VXLAN)
430int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
431 struct switchdev_notifier_vxlan_fdb_info *fdb_info);
432int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
433 struct notifier_block *nb,
434 struct netlink_ext_ack *extack);
435void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni);
436
437#else
438static inline int
439vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
440 struct switchdev_notifier_vxlan_fdb_info *fdb_info)
441{
442 return -ENOENT;
443}
444
445static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
446 struct notifier_block *nb,
447 struct netlink_ext_ack *extack)
448{
449 return -EOPNOTSUPP;
450}
451
452static inline void
453vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
454{
455}
456#endif
457
458static inline void vxlan_flag_attr_error(int attrtype,
459 struct netlink_ext_ack *extack)
460{
461#define VXLAN_FLAG(flg) \
462 case IFLA_VXLAN_##flg: \
463 NL_SET_ERR_MSG_MOD(extack, \
464 "cannot change " #flg " flag"); \
465 break
466 switch (attrtype) {
467 VXLAN_FLAG(TTL_INHERIT);
468 VXLAN_FLAG(LEARNING);
469 VXLAN_FLAG(PROXY);
470 VXLAN_FLAG(RSC);
471 VXLAN_FLAG(L2MISS);
472 VXLAN_FLAG(L3MISS);
473 VXLAN_FLAG(COLLECT_METADATA);
474 VXLAN_FLAG(UDP_ZERO_CSUM6_TX);
475 VXLAN_FLAG(UDP_ZERO_CSUM6_RX);
476 VXLAN_FLAG(REMCSUM_TX);
477 VXLAN_FLAG(REMCSUM_RX);
478 VXLAN_FLAG(GBP);
479 VXLAN_FLAG(GPE);
480 VXLAN_FLAG(REMCSUM_NOPARTIAL);
481 default:
482 NL_SET_ERR_MSG_MOD(extack, \
483 "cannot change flag");
484 break;
485 }
486#undef VXLAN_FLAG
487}
488
489#endif
490