1
2
3
4
5
6#include <errno.h>
7#include <linux/if_link.h>
8#include <linux/rtnetlink.h>
9#include <linux/genetlink.h>
10#include <net/if.h>
11#include <rdma/rdma_netlink.h>
12#include <stdbool.h>
13#include <stdint.h>
14#include <stdlib.h>
15#include <stdalign.h>
16#include <string.h>
17#include <sys/socket.h>
18#include <unistd.h>
19
20#include <rte_errno.h>
21
22#include "mlx5_nl.h"
23#include "../mlx5_common_log.h"
24#include "mlx5_malloc.h"
25#ifdef HAVE_DEVLINK
26#include <linux/devlink.h>
27#endif
28
29
30
31#define MLX5_NL_BUF_SIZE (32 * 1024)
32
33#define MLX5_SEND_BUF_SIZE 32768
34
35#define MLX5_RECV_BUF_SIZE 32768
36
37#define MLX5_PHYS_PORT_NAME_MAX 128
38
39
40#define MLX5_VMWA_VLAN_DEVICE_PFX "evmlx"
41
42
43
44
45
46#ifndef MLX5_NDA_RTA
47#define MLX5_NDA_RTA(r) \
48 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
49#endif
50
51
52
53
54
55#ifndef NLMSG_TAIL
56#define NLMSG_TAIL(nmsg) \
57 ((struct rtattr *)(((char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
58#endif
59
60
61
62
63#ifndef HAVE_RDMA_NL_NLDEV
64#define RDMA_NL_NLDEV 5
65#endif
66#ifndef HAVE_RDMA_NLDEV_CMD_GET
67#define RDMA_NLDEV_CMD_GET 1
68#endif
69#ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
70#define RDMA_NLDEV_CMD_PORT_GET 5
71#endif
72#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
73#define RDMA_NLDEV_ATTR_DEV_INDEX 1
74#endif
75#ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
76#define RDMA_NLDEV_ATTR_DEV_NAME 2
77#endif
78#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
79#define RDMA_NLDEV_ATTR_PORT_INDEX 3
80#endif
81#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE
82#define RDMA_NLDEV_ATTR_PORT_STATE 12
83#endif
84#ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
85#define RDMA_NLDEV_ATTR_NDEV_INDEX 50
86#endif
87
88
89#ifndef HAVE_IFLA_NUM_VF
90#define IFLA_NUM_VF 21
91#endif
92#ifndef HAVE_IFLA_EXT_MASK
93#define IFLA_EXT_MASK 29
94#endif
95#ifndef HAVE_IFLA_PHYS_SWITCH_ID
96#define IFLA_PHYS_SWITCH_ID 36
97#endif
98#ifndef HAVE_IFLA_PHYS_PORT_NAME
99#define IFLA_PHYS_PORT_NAME 38
100#endif
101
102
103
104
105
106#ifndef DEVLINK_GENL_NAME
107#define DEVLINK_GENL_NAME "devlink"
108#endif
109#ifndef DEVLINK_GENL_VERSION
110#define DEVLINK_GENL_VERSION 1
111#endif
112#ifndef DEVLINK_ATTR_BUS_NAME
113#define DEVLINK_ATTR_BUS_NAME 1
114#endif
115#ifndef DEVLINK_ATTR_DEV_NAME
116#define DEVLINK_ATTR_DEV_NAME 2
117#endif
118#ifndef DEVLINK_ATTR_PARAM
119#define DEVLINK_ATTR_PARAM 80
120#endif
121#ifndef DEVLINK_ATTR_PARAM_NAME
122#define DEVLINK_ATTR_PARAM_NAME 81
123#endif
124#ifndef DEVLINK_ATTR_PARAM_TYPE
125#define DEVLINK_ATTR_PARAM_TYPE 83
126#endif
127#ifndef DEVLINK_ATTR_PARAM_VALUES_LIST
128#define DEVLINK_ATTR_PARAM_VALUES_LIST 84
129#endif
130#ifndef DEVLINK_ATTR_PARAM_VALUE
131#define DEVLINK_ATTR_PARAM_VALUE 85
132#endif
133#ifndef DEVLINK_ATTR_PARAM_VALUE_DATA
134#define DEVLINK_ATTR_PARAM_VALUE_DATA 86
135#endif
136#ifndef DEVLINK_ATTR_PARAM_VALUE_CMODE
137#define DEVLINK_ATTR_PARAM_VALUE_CMODE 87
138#endif
139#ifndef DEVLINK_PARAM_CMODE_DRIVERINIT
140#define DEVLINK_PARAM_CMODE_DRIVERINIT 1
141#endif
142#ifndef DEVLINK_CMD_RELOAD
143#define DEVLINK_CMD_RELOAD 37
144#endif
145#ifndef DEVLINK_CMD_PARAM_GET
146#define DEVLINK_CMD_PARAM_GET 38
147#endif
148#ifndef DEVLINK_CMD_PARAM_SET
149#define DEVLINK_CMD_PARAM_SET 39
150#endif
151#ifndef NLA_FLAG
152#define NLA_FLAG 6
153#endif
154
155
156struct mlx5_nl_mac_addr {
157 struct rte_ether_addr (*mac)[];
158
159 int mac_n;
160};
161
162#define MLX5_NL_CMD_GET_IB_NAME (1 << 0)
163#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
164#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
165#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
166#define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
167
168
169struct mlx5_nl_port_info {
170 const char *name;
171 uint32_t flags;
172 uint32_t ibindex;
173 uint32_t ifindex;
174 uint32_t portnum;
175 uint16_t state;
176};
177
178uint32_t atomic_sn;
179
180
181#define MLX5_NL_SN_GENERATE __atomic_add_fetch(&atomic_sn, 1, __ATOMIC_RELAXED)
182
183
184
185
186
187
188
189
190
191
192
193
194
195int
196mlx5_nl_init(int protocol, int groups)
197{
198 int fd;
199 int buf_size;
200 socklen_t opt_size;
201 struct sockaddr_nl local = {
202 .nl_family = AF_NETLINK,
203 .nl_groups = groups,
204 };
205 int ret;
206
207 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
208 if (fd == -1) {
209 rte_errno = errno;
210 return -rte_errno;
211 }
212 opt_size = sizeof(buf_size);
213 ret = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf_size, &opt_size);
214 if (ret == -1) {
215 rte_errno = errno;
216 goto error;
217 }
218 DRV_LOG(DEBUG, "Netlink socket send buffer: %d", buf_size);
219 if (buf_size < MLX5_SEND_BUF_SIZE) {
220 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
221 &buf_size, sizeof(buf_size));
222 if (ret == -1) {
223 rte_errno = errno;
224 goto error;
225 }
226 }
227 opt_size = sizeof(buf_size);
228 ret = getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf_size, &opt_size);
229 if (ret == -1) {
230 rte_errno = errno;
231 goto error;
232 }
233 DRV_LOG(DEBUG, "Netlink socket recv buffer: %d", buf_size);
234 if (buf_size < MLX5_RECV_BUF_SIZE) {
235 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
236 &buf_size, sizeof(buf_size));
237 if (ret == -1) {
238 rte_errno = errno;
239 goto error;
240 }
241 }
242 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
243 if (ret == -1) {
244 rte_errno = errno;
245 goto error;
246 }
247 return fd;
248error:
249 close(fd);
250 return -rte_errno;
251}
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271static int
272mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
273 int len)
274{
275 struct sockaddr_nl sa = {
276 .nl_family = AF_NETLINK,
277 };
278 struct iovec iov[2] = {
279 { .iov_base = nh, .iov_len = sizeof(*nh), },
280 { .iov_base = req, .iov_len = len, },
281 };
282 struct msghdr msg = {
283 .msg_name = &sa,
284 .msg_namelen = sizeof(sa),
285 .msg_iov = iov,
286 .msg_iovlen = 2,
287 };
288 int send_bytes;
289
290 nh->nlmsg_pid = 0;
291 nh->nlmsg_seq = sn;
292 send_bytes = sendmsg(nlsk_fd, &msg, 0);
293 if (send_bytes < 0) {
294 rte_errno = errno;
295 return -rte_errno;
296 }
297 return send_bytes;
298}
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314static int
315mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
316{
317 struct sockaddr_nl sa = {
318 .nl_family = AF_NETLINK,
319 };
320 struct iovec iov = {
321 .iov_base = nh,
322 .iov_len = nh->nlmsg_len,
323 };
324 struct msghdr msg = {
325 .msg_name = &sa,
326 .msg_namelen = sizeof(sa),
327 .msg_iov = &iov,
328 .msg_iovlen = 1,
329 };
330 int send_bytes;
331
332 nh->nlmsg_pid = 0;
333 nh->nlmsg_seq = sn;
334 send_bytes = sendmsg(nlsk_fd, &msg, 0);
335 if (send_bytes < 0) {
336 rte_errno = errno;
337 return -rte_errno;
338 }
339 return send_bytes;
340}
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358static int
359mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
360 void *arg)
361{
362 struct sockaddr_nl sa;
363 struct iovec iov;
364 struct msghdr msg = {
365 .msg_name = &sa,
366 .msg_namelen = sizeof(sa),
367 .msg_iov = &iov,
368
369 .msg_iovlen = 1,
370 };
371 void *buf = NULL;
372 int multipart = 0;
373 int ret = 0;
374
375 do {
376 struct nlmsghdr *nh;
377 int recv_bytes;
378
379 do {
380
381 iov.iov_base = NULL;
382 iov.iov_len = 0;
383 recv_bytes = recvmsg(nlsk_fd, &msg,
384 MSG_PEEK | MSG_TRUNC);
385 if (recv_bytes < 0) {
386 rte_errno = errno;
387 ret = -rte_errno;
388 goto exit;
389 }
390 if (recv_bytes == 0) {
391 rte_errno = ENODATA;
392 ret = -rte_errno;
393 goto exit;
394 }
395
396 if (recv_bytes < MLX5_RECV_BUF_SIZE)
397 recv_bytes = MLX5_RECV_BUF_SIZE;
398 mlx5_free(buf);
399 buf = mlx5_malloc(0, recv_bytes, 0, SOCKET_ID_ANY);
400 if (!buf) {
401 rte_errno = ENOMEM;
402 ret = -rte_errno;
403 goto exit;
404 }
405
406 iov.iov_base = buf;
407 iov.iov_len = recv_bytes;
408 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
409 if (recv_bytes == -1) {
410 rte_errno = errno;
411 ret = -rte_errno;
412 goto exit;
413 }
414 nh = (struct nlmsghdr *)buf;
415 } while (nh->nlmsg_seq != sn);
416 for (;
417 NLMSG_OK(nh, (unsigned int)recv_bytes);
418 nh = NLMSG_NEXT(nh, recv_bytes)) {
419 if (nh->nlmsg_type == NLMSG_ERROR) {
420 struct nlmsgerr *err_data = NLMSG_DATA(nh);
421
422 if (err_data->error < 0) {
423 rte_errno = -err_data->error;
424 ret = -rte_errno;
425 goto exit;
426 }
427
428 ret = 0;
429 goto exit;
430 }
431
432 if (nh->nlmsg_flags & NLM_F_MULTI) {
433 if (nh->nlmsg_type == NLMSG_DONE) {
434 ret = 0;
435 goto exit;
436 }
437 multipart = 1;
438 }
439 if (cb) {
440 ret = cb(nh, arg);
441 if (ret < 0)
442 goto exit;
443 }
444 }
445 } while (multipart);
446exit:
447 mlx5_free(buf);
448 return ret;
449}
450
451
452
453
454
455
456
457
458
459
460
461
462static int
463mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
464{
465 struct mlx5_nl_mac_addr *data = arg;
466 struct ndmsg *r = NLMSG_DATA(nh);
467 struct rtattr *attribute;
468 int len;
469
470 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
471 for (attribute = MLX5_NDA_RTA(r);
472 RTA_OK(attribute, len);
473 attribute = RTA_NEXT(attribute, len)) {
474 if (attribute->rta_type == NDA_LLADDR) {
475 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
476 DRV_LOG(WARNING,
477 "not enough room to finalize the"
478 " request");
479 rte_errno = ENOMEM;
480 return -rte_errno;
481 }
482#ifdef RTE_LIBRTE_MLX5_DEBUG
483 char m[RTE_ETHER_ADDR_FMT_SIZE];
484
485 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE,
486 RTA_DATA(attribute));
487 DRV_LOG(DEBUG, "bridge MAC address %s", m);
488#endif
489 memcpy(&(*data->mac)[data->mac_n++],
490 RTA_DATA(attribute), RTE_ETHER_ADDR_LEN);
491 }
492 }
493 return 0;
494}
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512static int
513mlx5_nl_mac_addr_list(int nlsk_fd, unsigned int iface_idx,
514 struct rte_ether_addr (*mac)[], int *mac_n)
515{
516 struct {
517 struct nlmsghdr hdr;
518 struct ifinfomsg ifm;
519 } req = {
520 .hdr = {
521 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
522 .nlmsg_type = RTM_GETNEIGH,
523 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
524 },
525 .ifm = {
526 .ifi_family = PF_BRIDGE,
527 .ifi_index = iface_idx,
528 },
529 };
530 struct mlx5_nl_mac_addr data = {
531 .mac = mac,
532 .mac_n = 0,
533 };
534 uint32_t sn = MLX5_NL_SN_GENERATE;
535 int ret;
536
537 if (nlsk_fd == -1)
538 return 0;
539 ret = mlx5_nl_request(nlsk_fd, &req.hdr, sn, &req.ifm,
540 sizeof(struct ifinfomsg));
541 if (ret < 0)
542 goto error;
543 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_mac_addr_cb, &data);
544 if (ret < 0)
545 goto error;
546 *mac_n = data.mac_n;
547 return 0;
548error:
549 DRV_LOG(DEBUG, "Interface %u cannot retrieve MAC address list %s",
550 iface_idx, strerror(rte_errno));
551 return -rte_errno;
552}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569static int
570mlx5_nl_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
571 struct rte_ether_addr *mac, int add)
572{
573 struct {
574 struct nlmsghdr hdr;
575 struct ndmsg ndm;
576 struct rtattr rta;
577 uint8_t buffer[RTE_ETHER_ADDR_LEN];
578 } req = {
579 .hdr = {
580 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
581 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
582 NLM_F_EXCL | NLM_F_ACK,
583 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
584 },
585 .ndm = {
586 .ndm_family = PF_BRIDGE,
587 .ndm_state = NUD_NOARP | NUD_PERMANENT,
588 .ndm_ifindex = iface_idx,
589 .ndm_flags = NTF_SELF,
590 },
591 .rta = {
592 .rta_type = NDA_LLADDR,
593 .rta_len = RTA_LENGTH(RTE_ETHER_ADDR_LEN),
594 },
595 };
596 uint32_t sn = MLX5_NL_SN_GENERATE;
597 int ret;
598
599 if (nlsk_fd == -1)
600 return 0;
601 memcpy(RTA_DATA(&req.rta), mac, RTE_ETHER_ADDR_LEN);
602 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
603 RTA_ALIGN(req.rta.rta_len);
604 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
605 if (ret < 0)
606 goto error;
607 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
608 if (ret < 0)
609 goto error;
610 return 0;
611error:
612#ifdef RTE_LIBRTE_MLX5_DEBUG
613 {
614 char m[RTE_ETHER_ADDR_FMT_SIZE];
615
616 rte_ether_format_addr(m, RTE_ETHER_ADDR_FMT_SIZE, mac);
617 DRV_LOG(DEBUG,
618 "Interface %u cannot %s MAC address %s %s",
619 iface_idx,
620 add ? "add" : "remove", m, strerror(rte_errno));
621 }
622#endif
623 return -rte_errno;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641int
642mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
643 struct rte_ether_addr *mac, int vf_index)
644{
645 int ret;
646 struct {
647 struct nlmsghdr hdr;
648 struct ifinfomsg ifm;
649 struct rtattr vf_list_rta;
650 struct rtattr vf_info_rta;
651 struct rtattr vf_mac_rta;
652 struct ifla_vf_mac ivm;
653 } req = {
654 .hdr = {
655 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
656 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
657 .nlmsg_type = RTM_BASE,
658 },
659 .ifm = {
660 .ifi_index = iface_idx,
661 },
662 .vf_list_rta = {
663 .rta_type = IFLA_VFINFO_LIST,
664 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
665 },
666 .vf_info_rta = {
667 .rta_type = IFLA_VF_INFO,
668 .rta_len = RTA_ALIGN(RTA_LENGTH(0)),
669 },
670 .vf_mac_rta = {
671 .rta_type = IFLA_VF_MAC,
672 },
673 };
674 struct ifla_vf_mac ivm = {
675 .vf = vf_index,
676 };
677 uint32_t sn = MLX5_NL_SN_GENERATE;
678
679 memcpy(&ivm.mac, mac, RTE_ETHER_ADDR_LEN);
680 memcpy(RTA_DATA(&req.vf_mac_rta), &ivm, sizeof(ivm));
681
682 req.vf_mac_rta.rta_len = RTA_LENGTH(sizeof(ivm));
683 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
684 RTA_ALIGN(req.vf_list_rta.rta_len) +
685 RTA_ALIGN(req.vf_info_rta.rta_len) +
686 RTA_ALIGN(req.vf_mac_rta.rta_len);
687 req.vf_list_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
688 &req.vf_list_rta);
689 req.vf_info_rta.rta_len = RTE_PTR_DIFF(NLMSG_TAIL(&req.hdr),
690 &req.vf_info_rta);
691
692 if (nlsk_fd < 0)
693 return -1;
694 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
695 if (ret < 0)
696 goto error;
697 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
698 if (ret < 0)
699 goto error;
700 return 0;
701error:
702 DRV_LOG(ERR,
703 "representor %u cannot set VF MAC address "
704 RTE_ETHER_ADDR_PRT_FMT " : %s",
705 vf_index,
706 RTE_ETHER_ADDR_BYTES(mac),
707 strerror(rte_errno));
708 return -rte_errno;
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728int
729mlx5_nl_mac_addr_add(int nlsk_fd, unsigned int iface_idx,
730 uint64_t *mac_own, struct rte_ether_addr *mac,
731 uint32_t index)
732{
733 int ret;
734
735 ret = mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 1);
736 if (!ret) {
737 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
738 if (index >= MLX5_MAX_MAC_ADDRESSES)
739 return -EINVAL;
740
741 BITFIELD_SET(mac_own, index);
742 }
743 if (ret == -EEXIST)
744 return 0;
745 return ret;
746}
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765int
766mlx5_nl_mac_addr_remove(int nlsk_fd, unsigned int iface_idx, uint64_t *mac_own,
767 struct rte_ether_addr *mac, uint32_t index)
768{
769 MLX5_ASSERT(index < MLX5_MAX_MAC_ADDRESSES);
770 if (index >= MLX5_MAX_MAC_ADDRESSES)
771 return -EINVAL;
772
773 BITFIELD_RESET(mac_own, index);
774 return mlx5_nl_mac_addr_modify(nlsk_fd, iface_idx, mac, 0);
775}
776
777
778
779
780
781
782
783
784
785
786
787
788
789void
790mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx,
791 struct rte_ether_addr *mac_addrs, int n)
792{
793 struct rte_ether_addr macs[n];
794 int macs_n = 0;
795 int i;
796 int ret;
797
798 memset(macs, 0, n * sizeof(macs[0]));
799 ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n);
800 if (ret)
801 return;
802 for (i = 0; i != macs_n; ++i) {
803 int j;
804
805
806 for (j = 0; j != n; ++j)
807 if (rte_is_same_ether_addr(&macs[i], &mac_addrs[j]))
808 break;
809 if (j != n)
810 continue;
811 if (rte_is_multicast_ether_addr(&macs[i])) {
812
813 for (j = MLX5_MAX_UC_MAC_ADDRESSES; j != n; ++j) {
814 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
815 mac_addrs[j] = macs[i];
816 break;
817 }
818 }
819 } else {
820
821 for (j = 0; j != MLX5_MAX_UC_MAC_ADDRESSES; ++j) {
822 if (rte_is_zero_ether_addr(&mac_addrs[j])) {
823 mac_addrs[j] = macs[i];
824 break;
825 }
826 }
827 }
828 }
829}
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845void
846mlx5_nl_mac_addr_flush(int nlsk_fd, unsigned int iface_idx,
847 struct rte_ether_addr *mac_addrs, int n,
848 uint64_t *mac_own)
849{
850 int i;
851
852 if (n <= 0 || n > MLX5_MAX_MAC_ADDRESSES)
853 return;
854
855 for (i = n - 1; i >= 0; --i) {
856 struct rte_ether_addr *m = &mac_addrs[i];
857
858 if (BITFIELD_ISSET(mac_own, i))
859 mlx5_nl_mac_addr_remove(nlsk_fd, iface_idx, mac_own, m,
860 i);
861 }
862}
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879static int
880mlx5_nl_device_flags(int nlsk_fd, unsigned int iface_idx, uint32_t flags,
881 int enable)
882{
883 struct {
884 struct nlmsghdr hdr;
885 struct ifinfomsg ifi;
886 } req = {
887 .hdr = {
888 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
889 .nlmsg_type = RTM_NEWLINK,
890 .nlmsg_flags = NLM_F_REQUEST,
891 },
892 .ifi = {
893 .ifi_flags = enable ? flags : 0,
894 .ifi_change = flags,
895 .ifi_index = iface_idx,
896 },
897 };
898 uint32_t sn = MLX5_NL_SN_GENERATE;
899 int ret;
900
901 MLX5_ASSERT(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
902 if (nlsk_fd < 0)
903 return 0;
904 ret = mlx5_nl_send(nlsk_fd, &req.hdr, sn);
905 if (ret < 0)
906 return ret;
907 return 0;
908}
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923int
924mlx5_nl_promisc(int nlsk_fd, unsigned int iface_idx, int enable)
925{
926 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_PROMISC, enable);
927
928 if (ret)
929 DRV_LOG(DEBUG,
930 "Interface %u cannot %s promisc mode: Netlink error %s",
931 iface_idx, enable ? "enable" : "disable",
932 strerror(rte_errno));
933 return ret;
934}
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949int
950mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
951{
952 int ret = mlx5_nl_device_flags(nlsk_fd, iface_idx, IFF_ALLMULTI,
953 enable);
954
955 if (ret)
956 DRV_LOG(DEBUG,
957 "Interface %u cannot %s allmulti : Netlink error %s",
958 iface_idx, enable ? "enable" : "disable",
959 strerror(rte_errno));
960 return ret;
961}
962
963
964
965
966
967
968
969
970
971
972
973
974static int
975mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
976{
977 struct mlx5_nl_port_info *data = arg;
978 struct mlx5_nl_port_info local = {
979 .flags = 0,
980 };
981 size_t off = NLMSG_HDRLEN;
982
983 if (nh->nlmsg_type !=
984 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
985 nh->nlmsg_type !=
986 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
987 goto error;
988 while (off < nh->nlmsg_len) {
989 struct nlattr *na = (void *)((uintptr_t)nh + off);
990 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
991
992 if (na->nla_len > nh->nlmsg_len - off)
993 goto error;
994 switch (na->nla_type) {
995 case RDMA_NLDEV_ATTR_DEV_INDEX:
996 local.ibindex = *(uint32_t *)payload;
997 local.flags |= MLX5_NL_CMD_GET_IB_INDEX;
998 break;
999 case RDMA_NLDEV_ATTR_DEV_NAME:
1000 if (!strcmp(payload, data->name))
1001 local.flags |= MLX5_NL_CMD_GET_IB_NAME;
1002 break;
1003 case RDMA_NLDEV_ATTR_NDEV_INDEX:
1004 local.ifindex = *(uint32_t *)payload;
1005 local.flags |= MLX5_NL_CMD_GET_NET_INDEX;
1006 break;
1007 case RDMA_NLDEV_ATTR_PORT_INDEX:
1008 local.portnum = *(uint32_t *)payload;
1009 local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
1010 break;
1011 case RDMA_NLDEV_ATTR_PORT_STATE:
1012 local.state = *(uint8_t *)payload;
1013 local.flags |= MLX5_NL_CMD_GET_PORT_STATE;
1014 break;
1015 default:
1016 break;
1017 }
1018 off += NLA_ALIGN(na->nla_len);
1019 }
1020
1021
1022
1023
1024
1025
1026 if (local.flags & MLX5_NL_CMD_GET_IB_NAME) {
1027 data->flags = local.flags;
1028 data->ibindex = local.ibindex;
1029 data->ifindex = local.ifindex;
1030 data->portnum = local.portnum;
1031 data->state = local.state;
1032 }
1033 return 0;
1034error:
1035 rte_errno = EINVAL;
1036 return -rte_errno;
1037}
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055static int
1056mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data)
1057{
1058 union {
1059 struct nlmsghdr nh;
1060 uint8_t buf[NLMSG_HDRLEN +
1061 NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) +
1062 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
1063 } req = {
1064 .nh = {
1065 .nlmsg_len = NLMSG_LENGTH(0),
1066 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1067 RDMA_NLDEV_CMD_GET),
1068 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1069 },
1070 };
1071 struct nlattr *na;
1072 uint32_t sn = MLX5_NL_SN_GENERATE;
1073 int ret;
1074
1075 ret = mlx5_nl_send(nl, &req.nh, sn);
1076 if (ret < 0)
1077 return ret;
1078 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
1079 if (ret < 0)
1080 return ret;
1081 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
1082 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX))
1083 goto error;
1084 data->flags = 0;
1085 sn = MLX5_NL_SN_GENERATE;
1086 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1087 RDMA_NLDEV_CMD_PORT_GET);
1088 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1089 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
1090 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
1091 na->nla_len = NLA_HDRLEN + sizeof(data->ibindex);
1092 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
1093 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1094 &data->ibindex, sizeof(data->ibindex));
1095 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
1096 na->nla_len = NLA_HDRLEN + sizeof(pindex);
1097 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
1098 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
1099 &pindex, sizeof(pindex));
1100 ret = mlx5_nl_send(nl, &req.nh, sn);
1101 if (ret < 0)
1102 return ret;
1103 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
1104 if (ret < 0)
1105 return ret;
1106 if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
1107 !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1108 !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) ||
1109 !data->ifindex)
1110 goto error;
1111 return 1;
1112error:
1113 rte_errno = ENODEV;
1114 return -rte_errno;
1115}
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134unsigned int
1135mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
1136{
1137 struct mlx5_nl_port_info data = {
1138 .ifindex = 0,
1139 .name = name,
1140 };
1141
1142 if (mlx5_nl_port_info(nl, pindex, &data) < 0)
1143 return 0;
1144 return data.ifindex;
1145}
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163int
1164mlx5_nl_port_state(int nl, const char *name, uint32_t pindex)
1165{
1166 struct mlx5_nl_port_info data = {
1167 .state = 0,
1168 .name = name,
1169 };
1170
1171 if (mlx5_nl_port_info(nl, pindex, &data) < 0)
1172 return -rte_errno;
1173 if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) {
1174 rte_errno = ENOTSUP;
1175 return -rte_errno;
1176 }
1177 return (int)data.state;
1178}
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192unsigned int
1193mlx5_nl_portnum(int nl, const char *name)
1194{
1195 struct mlx5_nl_port_info data = {
1196 .flags = 0,
1197 .name = name,
1198 .ifindex = 0,
1199 .portnum = 0,
1200 };
1201 struct nlmsghdr req = {
1202 .nlmsg_len = NLMSG_LENGTH(0),
1203 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1204 RDMA_NLDEV_CMD_GET),
1205 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
1206 };
1207 uint32_t sn = MLX5_NL_SN_GENERATE;
1208 int ret;
1209
1210 ret = mlx5_nl_send(nl, &req, sn);
1211 if (ret < 0)
1212 return 0;
1213 ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
1214 if (ret < 0)
1215 return 0;
1216 if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
1217 !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
1218 !(data.flags & MLX5_NL_CMD_GET_PORT_INDEX)) {
1219 rte_errno = ENODEV;
1220 return 0;
1221 }
1222 if (!data.portnum)
1223 rte_errno = EINVAL;
1224 return data.portnum;
1225}
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241static void
1242mlx5_nl_check_switch_info(bool num_vf_set,
1243 struct mlx5_switch_info *switch_info)
1244{
1245 switch (switch_info->name_type) {
1246 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
1247
1248
1249
1250
1251 switch_info->master = num_vf_set;
1252 break;
1253 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
1254
1255
1256
1257
1258
1259 switch_info->master = num_vf_set;
1260 break;
1261 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
1262
1263 switch_info->master = 1;
1264 break;
1265 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
1266
1267 switch_info->representor = !num_vf_set;
1268 break;
1269 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
1270
1271 case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
1272
1273 case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
1274
1275 switch_info->representor = 1;
1276 break;
1277 }
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291static int
1292mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
1293{
1294 struct mlx5_switch_info info = {
1295 .master = 0,
1296 .representor = 0,
1297 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
1298 .port_name = 0,
1299 .switch_id = 0,
1300 };
1301 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1302 bool switch_id_set = false;
1303 bool num_vf_set = false;
1304 int len;
1305
1306 if (nh->nlmsg_type != RTM_NEWLINK)
1307 goto error;
1308 while (off < nh->nlmsg_len) {
1309 struct rtattr *ra = (void *)((uintptr_t)nh + off);
1310 void *payload = RTA_DATA(ra);
1311 unsigned int i;
1312
1313 if (ra->rta_len > nh->nlmsg_len - off)
1314 goto error;
1315 switch (ra->rta_type) {
1316 case IFLA_NUM_VF:
1317 num_vf_set = true;
1318 break;
1319 case IFLA_PHYS_PORT_NAME:
1320 len = RTA_PAYLOAD(ra);
1321
1322 if (len > 0 && len < MLX5_PHYS_PORT_NAME_MAX) {
1323 char name[MLX5_PHYS_PORT_NAME_MAX];
1324
1325
1326
1327
1328
1329
1330
1331 memcpy(name, payload, len);
1332 name[len] = 0;
1333 mlx5_translate_port_name(name, &info);
1334 } else {
1335 info.name_type =
1336 MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
1337 }
1338 break;
1339 case IFLA_PHYS_SWITCH_ID:
1340 info.switch_id = 0;
1341 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
1342 info.switch_id <<= 8;
1343 info.switch_id |= ((uint8_t *)payload)[i];
1344 }
1345 switch_id_set = true;
1346 break;
1347 }
1348 off += RTA_ALIGN(ra->rta_len);
1349 }
1350 if (switch_id_set) {
1351
1352 mlx5_nl_check_switch_info(num_vf_set, &info);
1353 }
1354 MLX5_ASSERT(!(info.master && info.representor));
1355 memcpy(arg, &info, sizeof(info));
1356 return 0;
1357error:
1358 rte_errno = EINVAL;
1359 return -rte_errno;
1360}
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375int
1376mlx5_nl_switch_info(int nl, unsigned int ifindex,
1377 struct mlx5_switch_info *info)
1378{
1379 struct {
1380 struct nlmsghdr nh;
1381 struct ifinfomsg info;
1382 struct rtattr rta;
1383 uint32_t extmask;
1384 } req = {
1385 .nh = {
1386 .nlmsg_len = NLMSG_LENGTH
1387 (sizeof(req.info) +
1388 RTA_LENGTH(sizeof(uint32_t))),
1389 .nlmsg_type = RTM_GETLINK,
1390 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1391 },
1392 .info = {
1393 .ifi_family = AF_UNSPEC,
1394 .ifi_index = ifindex,
1395 },
1396 .rta = {
1397 .rta_type = IFLA_EXT_MASK,
1398 .rta_len = RTA_LENGTH(sizeof(int32_t)),
1399 },
1400 .extmask = RTE_LE32(1),
1401 };
1402 uint32_t sn = MLX5_NL_SN_GENERATE;
1403 int ret;
1404
1405 ret = mlx5_nl_send(nl, &req.nh, sn);
1406 if (ret >= 0)
1407 ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
1408 if (info->master && info->representor) {
1409 DRV_LOG(ERR, "ifindex %u device is recognized as master"
1410 " and as representor", ifindex);
1411 rte_errno = ENODEV;
1412 ret = -rte_errno;
1413 }
1414 return ret;
1415}
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425void
1426mlx5_nl_vlan_vmwa_delete(struct mlx5_nl_vlan_vmwa_context *vmwa,
1427 uint32_t ifindex)
1428{
1429 uint32_t sn = MLX5_NL_SN_GENERATE;
1430 int ret;
1431 struct {
1432 struct nlmsghdr nh;
1433 struct ifinfomsg info;
1434 } req = {
1435 .nh = {
1436 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
1437 .nlmsg_type = RTM_DELLINK,
1438 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
1439 },
1440 .info = {
1441 .ifi_family = AF_UNSPEC,
1442 .ifi_index = ifindex,
1443 },
1444 };
1445
1446 if (ifindex) {
1447 ret = mlx5_nl_send(vmwa->nl_socket, &req.nh, sn);
1448 if (ret >= 0)
1449 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1450 if (ret < 0)
1451 DRV_LOG(WARNING, "netlink: error deleting VLAN WA"
1452 " ifindex %u, %d", ifindex, ret);
1453 }
1454}
1455
1456
1457static struct nlattr *
1458nl_msg_tail(struct nlmsghdr *nlh)
1459{
1460 return (struct nlattr *)
1461 (((uint8_t *)nlh) + NLMSG_ALIGN(nlh->nlmsg_len));
1462}
1463
1464static void
1465nl_attr_put(struct nlmsghdr *nlh, int type, const void *data, int alen)
1466{
1467 struct nlattr *nla = nl_msg_tail(nlh);
1468
1469 nla->nla_type = type;
1470 nla->nla_len = NLMSG_ALIGN(sizeof(struct nlattr)) + alen;
1471 nlh->nlmsg_len += NLMSG_ALIGN(nla->nla_len);
1472
1473 if (alen)
1474 memcpy((uint8_t *)nla + sizeof(struct nlattr), data, alen);
1475}
1476
1477static struct nlattr *
1478nl_attr_nest_start(struct nlmsghdr *nlh, int type)
1479{
1480 struct nlattr *nest = (struct nlattr *)nl_msg_tail(nlh);
1481
1482 nl_attr_put(nlh, type, NULL, 0);
1483 return nest;
1484}
1485
1486static void
1487nl_attr_nest_end(struct nlmsghdr *nlh, struct nlattr *nest)
1488{
1489 nest->nla_len = (uint8_t *)nl_msg_tail(nlh) - (uint8_t *)nest;
1490}
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502uint32_t
1503mlx5_nl_vlan_vmwa_create(struct mlx5_nl_vlan_vmwa_context *vmwa,
1504 uint32_t ifindex, uint16_t tag)
1505{
1506 struct nlmsghdr *nlh;
1507 struct ifinfomsg *ifm;
1508 char name[sizeof(MLX5_VMWA_VLAN_DEVICE_PFX) + 32];
1509
1510 __rte_cache_aligned
1511 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1512 NLMSG_ALIGN(sizeof(struct ifinfomsg)) +
1513 NLMSG_ALIGN(sizeof(struct nlattr)) * 8 +
1514 NLMSG_ALIGN(sizeof(uint32_t)) +
1515 NLMSG_ALIGN(sizeof(name)) +
1516 NLMSG_ALIGN(sizeof("vlan")) +
1517 NLMSG_ALIGN(sizeof(uint32_t)) +
1518 NLMSG_ALIGN(sizeof(uint16_t)) + 16];
1519 struct nlattr *na_info;
1520 struct nlattr *na_vlan;
1521 uint32_t sn = MLX5_NL_SN_GENERATE;
1522 int ret;
1523
1524 memset(buf, 0, sizeof(buf));
1525 nlh = (struct nlmsghdr *)buf;
1526 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1527 nlh->nlmsg_type = RTM_NEWLINK;
1528 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
1529 NLM_F_EXCL | NLM_F_ACK;
1530 ifm = (struct ifinfomsg *)nl_msg_tail(nlh);
1531 nlh->nlmsg_len += sizeof(struct ifinfomsg);
1532 ifm->ifi_family = AF_UNSPEC;
1533 ifm->ifi_type = 0;
1534 ifm->ifi_index = 0;
1535 ifm->ifi_flags = IFF_UP;
1536 ifm->ifi_change = 0xffffffff;
1537 nl_attr_put(nlh, IFLA_LINK, &ifindex, sizeof(ifindex));
1538 ret = snprintf(name, sizeof(name), "%s.%u.%u",
1539 MLX5_VMWA_VLAN_DEVICE_PFX, ifindex, tag);
1540 nl_attr_put(nlh, IFLA_IFNAME, name, ret + 1);
1541 na_info = nl_attr_nest_start(nlh, IFLA_LINKINFO);
1542 nl_attr_put(nlh, IFLA_INFO_KIND, "vlan", sizeof("vlan"));
1543 na_vlan = nl_attr_nest_start(nlh, IFLA_INFO_DATA);
1544 nl_attr_put(nlh, IFLA_VLAN_ID, &tag, sizeof(tag));
1545 nl_attr_nest_end(nlh, na_vlan);
1546 nl_attr_nest_end(nlh, na_info);
1547 MLX5_ASSERT(sizeof(buf) >= nlh->nlmsg_len);
1548 ret = mlx5_nl_send(vmwa->nl_socket, nlh, sn);
1549 if (ret >= 0)
1550 ret = mlx5_nl_recv(vmwa->nl_socket, sn, NULL, NULL);
1551 if (ret < 0) {
1552 DRV_LOG(WARNING, "netlink: VLAN %s create failure (%d)", name,
1553 ret);
1554 }
1555
1556 ret = if_nametoindex(name);
1557 if (!ret) {
1558 DRV_LOG(WARNING, "VLAN %s failed to get index (%d)", name,
1559 errno);
1560 return 0;
1561 }
1562 return ret;
1563}
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576static int
1577mlx5_nl_family_id_cb(struct nlmsghdr *nh, void *arg)
1578{
1579
1580 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1581 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1582 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1583
1584 for (; nla->nla_len && nla < tail;
1585 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len))) {
1586 if (nla->nla_type == CTRL_ATTR_FAMILY_ID) {
1587 *(uint16_t *)arg = *(uint16_t *)(nla + 1);
1588 return 0;
1589 }
1590 }
1591 return -EINVAL;
1592}
1593
1594#define MLX5_NL_MAX_ATTR_SIZE 100
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607static int
1608mlx5_nl_generic_family_id_get(int nlsk_fd, const char *name)
1609{
1610 struct nlmsghdr *nlh;
1611 struct genlmsghdr *genl;
1612 uint32_t sn = MLX5_NL_SN_GENERATE;
1613 int name_size = strlen(name) + 1;
1614 int ret;
1615 uint16_t id = -1;
1616 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1617 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1618 NLMSG_ALIGN(sizeof(struct nlattr)) +
1619 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE)];
1620
1621 memset(buf, 0, sizeof(buf));
1622 nlh = (struct nlmsghdr *)buf;
1623 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1624 nlh->nlmsg_type = GENL_ID_CTRL;
1625 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1626 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1627 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1628 genl->cmd = CTRL_CMD_GETFAMILY;
1629 genl->version = 1;
1630 nl_attr_put(nlh, CTRL_ATTR_FAMILY_NAME, name, name_size);
1631 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1632 if (ret >= 0)
1633 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_family_id_cb, &id);
1634 if (ret < 0) {
1635 DRV_LOG(DEBUG, "Failed to get Netlink %s family ID: %d.", name,
1636 ret);
1637 return ret;
1638 }
1639 DRV_LOG(DEBUG, "Netlink \"%s\" family ID is %u.", name, id);
1640 return (int)id;
1641}
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654int
1655mlx5_nl_devlink_family_id_get(int nlsk_fd)
1656{
1657 return mlx5_nl_generic_family_id_get(nlsk_fd, DEVLINK_GENL_NAME);
1658}
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static int
1672mlx5_nl_roce_cb(struct nlmsghdr *nh, void *arg)
1673{
1674
1675 int ret = -EINVAL;
1676 int *enable = arg;
1677 struct nlattr *tail = RTE_PTR_ADD(nh, nh->nlmsg_len);
1678 struct nlattr *nla = RTE_PTR_ADD(nh, NLMSG_ALIGN(sizeof(*nh)) +
1679 NLMSG_ALIGN(sizeof(struct genlmsghdr)));
1680
1681 while (nla->nla_len && nla < tail) {
1682 switch (nla->nla_type) {
1683
1684 case DEVLINK_ATTR_PARAM:
1685 case DEVLINK_ATTR_PARAM_VALUES_LIST:
1686 case DEVLINK_ATTR_PARAM_VALUE:
1687 ret = 0;
1688 nla += 1;
1689 break;
1690 case DEVLINK_ATTR_PARAM_VALUE_DATA:
1691 *enable = 1;
1692 return 0;
1693 default:
1694 nla = RTE_PTR_ADD(nla, NLMSG_ALIGN(nla->nla_len));
1695 }
1696 }
1697 *enable = 0;
1698 return ret;
1699}
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717int
1718mlx5_nl_enable_roce_get(int nlsk_fd, int family_id, const char *pci_addr,
1719 int *enable)
1720{
1721 struct nlmsghdr *nlh;
1722 struct genlmsghdr *genl;
1723 uint32_t sn = MLX5_NL_SN_GENERATE;
1724 int ret;
1725 int cur_en = 0;
1726 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1727 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1728 NLMSG_ALIGN(sizeof(struct nlattr)) * 4 +
1729 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 4];
1730
1731 memset(buf, 0, sizeof(buf));
1732 nlh = (struct nlmsghdr *)buf;
1733 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1734 nlh->nlmsg_type = family_id;
1735 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1736 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1737 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1738 genl->cmd = DEVLINK_CMD_PARAM_GET;
1739 genl->version = DEVLINK_GENL_VERSION;
1740 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1741 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1742 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1743 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1744 if (ret >= 0)
1745 ret = mlx5_nl_recv(nlsk_fd, sn, mlx5_nl_roce_cb, &cur_en);
1746 if (ret < 0) {
1747 DRV_LOG(DEBUG, "Failed to get ROCE enable on device %s: %d.",
1748 pci_addr, ret);
1749 return ret;
1750 }
1751 *enable = cur_en;
1752 DRV_LOG(DEBUG, "ROCE is %sabled for device \"%s\".",
1753 cur_en ? "en" : "dis", pci_addr);
1754 return ret;
1755}
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772static int
1773mlx5_nl_driver_reload(int nlsk_fd, int family_id, const char *pci_addr)
1774{
1775 struct nlmsghdr *nlh;
1776 struct genlmsghdr *genl;
1777 uint32_t sn = MLX5_NL_SN_GENERATE;
1778 int ret;
1779 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1780 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1781 NLMSG_ALIGN(sizeof(struct nlattr)) * 2 +
1782 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 2];
1783
1784 memset(buf, 0, sizeof(buf));
1785 nlh = (struct nlmsghdr *)buf;
1786 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1787 nlh->nlmsg_type = family_id;
1788 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1789 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1790 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1791 genl->cmd = DEVLINK_CMD_RELOAD;
1792 genl->version = DEVLINK_GENL_VERSION;
1793 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1794 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1795 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1796 if (ret >= 0)
1797 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1798 if (ret < 0) {
1799 DRV_LOG(DEBUG, "Failed to reload %s device by Netlink - %d",
1800 pci_addr, ret);
1801 return ret;
1802 }
1803 DRV_LOG(DEBUG, "Device \"%s\" was reloaded by Netlink successfully.",
1804 pci_addr);
1805 return 0;
1806}
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823int
1824mlx5_nl_enable_roce_set(int nlsk_fd, int family_id, const char *pci_addr,
1825 int enable)
1826{
1827 struct nlmsghdr *nlh;
1828 struct genlmsghdr *genl;
1829 uint32_t sn = MLX5_NL_SN_GENERATE;
1830 int ret;
1831 uint8_t buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
1832 NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
1833 NLMSG_ALIGN(sizeof(struct nlattr)) * 6 +
1834 NLMSG_ALIGN(MLX5_NL_MAX_ATTR_SIZE) * 6];
1835 uint8_t cmode = DEVLINK_PARAM_CMODE_DRIVERINIT;
1836 uint8_t ptype = NLA_FLAG;
1837;
1838
1839 memset(buf, 0, sizeof(buf));
1840 nlh = (struct nlmsghdr *)buf;
1841 nlh->nlmsg_len = sizeof(struct nlmsghdr);
1842 nlh->nlmsg_type = family_id;
1843 nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1844 genl = (struct genlmsghdr *)nl_msg_tail(nlh);
1845 nlh->nlmsg_len += sizeof(struct genlmsghdr);
1846 genl->cmd = DEVLINK_CMD_PARAM_SET;
1847 genl->version = DEVLINK_GENL_VERSION;
1848 nl_attr_put(nlh, DEVLINK_ATTR_BUS_NAME, "pci", 4);
1849 nl_attr_put(nlh, DEVLINK_ATTR_DEV_NAME, pci_addr, strlen(pci_addr) + 1);
1850 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_NAME, "enable_roce", 12);
1851 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, &cmode, sizeof(cmode));
1852 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_TYPE, &ptype, sizeof(ptype));
1853 if (enable)
1854 nl_attr_put(nlh, DEVLINK_ATTR_PARAM_VALUE_DATA, NULL, 0);
1855 ret = mlx5_nl_send(nlsk_fd, nlh, sn);
1856 if (ret >= 0)
1857 ret = mlx5_nl_recv(nlsk_fd, sn, NULL, NULL);
1858 if (ret < 0) {
1859 DRV_LOG(DEBUG, "Failed to %sable ROCE for device %s by Netlink:"
1860 " %d.", enable ? "en" : "dis", pci_addr, ret);
1861 return ret;
1862 }
1863 DRV_LOG(DEBUG, "Device %s ROCE was %sabled by Netlink successfully.",
1864 pci_addr, enable ? "en" : "dis");
1865
1866 return mlx5_nl_driver_reload(nlsk_fd, family_id, pci_addr);
1867}
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880int
1881mlx5_nl_parse_link_status_update(struct nlmsghdr *hdr, uint32_t *ifindex)
1882{
1883 struct ifinfomsg *info;
1884
1885 switch (hdr->nlmsg_type) {
1886 case RTM_NEWLINK:
1887 case RTM_DELLINK:
1888 case RTM_GETLINK:
1889 case RTM_SETLINK:
1890 info = NLMSG_DATA(hdr);
1891 *ifindex = info->ifi_index;
1892 return 0;
1893 }
1894 return -1;
1895}
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911int
1912mlx5_nl_read_events(int nlsk_fd, mlx5_nl_event_cb *cb, void *cb_arg)
1913{
1914 char buf[8192];
1915 struct sockaddr_nl addr;
1916 struct iovec iov = {
1917 .iov_base = buf,
1918 .iov_len = sizeof(buf),
1919 };
1920 struct msghdr msg = {
1921 .msg_name = &addr,
1922 .msg_namelen = sizeof(addr),
1923 .msg_iov = &iov,
1924 .msg_iovlen = 1,
1925 };
1926 struct nlmsghdr *hdr;
1927 ssize_t size;
1928
1929 while (1) {
1930 size = recvmsg(nlsk_fd, &msg, MSG_DONTWAIT);
1931 if (size < 0) {
1932 if (errno == EAGAIN)
1933 return 0;
1934 if (errno == EINTR)
1935 continue;
1936 DRV_LOG(DEBUG, "Failed to receive netlink message: %s",
1937 strerror(errno));
1938 rte_errno = errno;
1939 return -rte_errno;
1940 }
1941 hdr = (struct nlmsghdr *)buf;
1942 while (size >= (ssize_t)sizeof(*hdr)) {
1943 ssize_t msg_len = hdr->nlmsg_len;
1944 ssize_t data_len = msg_len - sizeof(*hdr);
1945 ssize_t aligned_len;
1946
1947 if (data_len < 0) {
1948 DRV_LOG(DEBUG, "Netlink message too short");
1949 rte_errno = EINVAL;
1950 return -rte_errno;
1951 }
1952 aligned_len = NLMSG_ALIGN(msg_len);
1953 if (aligned_len > size) {
1954 DRV_LOG(DEBUG, "Netlink message too long");
1955 rte_errno = EINVAL;
1956 return -rte_errno;
1957 }
1958 cb(hdr, cb_arg);
1959 hdr = RTE_PTR_ADD(hdr, aligned_len);
1960 size -= aligned_len;
1961 }
1962 }
1963 return 0;
1964}
1965