dpdk/drivers/net/tap/tap_netlink.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6#include <errno.h>
   7#include <inttypes.h>
   8#include <linux/netlink.h>
   9#include <string.h>
  10#include <sys/socket.h>
  11#include <unistd.h>
  12#include <stdbool.h>
  13
  14#include <rte_malloc.h>
  15#include <tap_netlink.h>
  16#include <rte_random.h>
  17
  18#include "tap_log.h"
  19
  20/* Compatibility with glibc < 2.24 */
  21#ifndef SOL_NETLINK
  22#define SOL_NETLINK     270
  23#endif
  24
  25/* Must be quite large to support dumping a huge list of QDISC or filters. */
  26#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
  27#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
  28#define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
  29
  30struct nested_tail {
  31        struct rtattr *tail;
  32        struct nested_tail *prev;
  33};
  34
  35/**
  36 * Initialize a netlink socket for communicating with the kernel.
  37 *
  38 * @param nl_groups
  39 *   Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
  40 *   specific netlink multicast groups. Otherwise, no subscription will be made.
  41 *
  42 * @return
  43 *   netlink socket file descriptor on success, -1 otherwise.
  44 */
  45int
  46tap_nl_init(uint32_t nl_groups)
  47{
  48        int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
  49        struct sockaddr_nl local = {
  50                .nl_family = AF_NETLINK,
  51                .nl_groups = nl_groups,
  52        };
  53#ifdef NETLINK_EXT_ACK
  54        int one = 1;
  55#endif
  56
  57        fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
  58        if (fd < 0) {
  59                TAP_LOG(ERR, "Unable to create a netlink socket");
  60                return -1;
  61        }
  62        if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
  63                TAP_LOG(ERR, "Unable to set socket buffer send size");
  64                close(fd);
  65                return -1;
  66        }
  67        if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
  68                TAP_LOG(ERR, "Unable to set socket buffer receive size");
  69                close(fd);
  70                return -1;
  71        }
  72
  73#ifdef NETLINK_EXT_ACK
  74        /* Ask for extended ACK response. on older kernel will ignore request. */
  75        setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one));
  76#endif
  77
  78        if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
  79                TAP_LOG(ERR, "Unable to bind to the netlink socket");
  80                close(fd);
  81                return -1;
  82        }
  83        return fd;
  84}
  85
  86/**
  87 * Clean up a netlink socket once all communicating with the kernel is finished.
  88 *
  89 * @param[in] nlsk_fd
  90 *   The netlink socket file descriptor used for communication.
  91 *
  92 * @return
  93 *   0 on success, -1 otherwise.
  94 */
  95int
  96tap_nl_final(int nlsk_fd)
  97{
  98        if (close(nlsk_fd)) {
  99                TAP_LOG(ERR, "Failed to close netlink socket: %s (%d)",
 100                        strerror(errno), errno);
 101                return -1;
 102        }
 103        return 0;
 104}
 105
 106/**
 107 * Send a message to the kernel on the netlink socket.
 108 *
 109 * @param[in] nlsk_fd
 110 *   The netlink socket file descriptor used for communication.
 111 * @param[in] nh
 112 *   The netlink message send to the kernel.
 113 *
 114 * @return
 115 *   the number of sent bytes on success, -1 otherwise.
 116 */
 117int
 118tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
 119{
 120        int send_bytes;
 121
 122        nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
 123        nh->nlmsg_seq = (uint32_t)rte_rand();
 124
 125retry:
 126        send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0);
 127        if (send_bytes < 0) {
 128                if (errno == EINTR)
 129                        goto retry;
 130
 131                TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
 132                        strerror(errno), errno);
 133                return -1;
 134        }
 135        return send_bytes;
 136}
 137
 138#ifdef NETLINK_EXT_ACK
 139static const struct nlattr *
 140tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset)
 141{
 142        return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset));
 143}
 144
 145static const struct nlattr *
 146tap_nl_attr_next(const struct nlattr *attr)
 147{
 148        return (const struct nlattr *)((const char *)attr
 149                                       + NLMSG_ALIGN(attr->nla_len));
 150}
 151
 152static bool
 153tap_nl_attr_ok(const struct nlattr *attr, int len)
 154{
 155        if (len < (int)sizeof(struct nlattr))
 156                return false; /* missing header */
 157        if (attr->nla_len < sizeof(struct nlattr))
 158                return false; /* attribute length should include itself */
 159        if ((int)attr->nla_len  > len)
 160                return false; /* attribute is truncated */
 161        return true;
 162}
 163
 164
 165/* Decode extended errors from kernel */
 166static void
 167tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err)
 168{
 169        const struct nlattr *attr;
 170        const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len);
 171        size_t hlen = sizeof(*err);
 172
 173        /* no TLVs, no extended response */
 174        if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS))
 175                return;
 176
 177        if (!(nh->nlmsg_flags & NLM_F_CAPPED))
 178                hlen += err->msg.nlmsg_len - NLMSG_HDRLEN;
 179
 180        for (attr = tap_nl_attr_first(nh, hlen);
 181             tap_nl_attr_ok(attr, tail - (const char *)attr);
 182             attr = tap_nl_attr_next(attr)) {
 183                uint16_t type = attr->nla_type & NLA_TYPE_MASK;
 184
 185                if (type == NLMSGERR_ATTR_MSG) {
 186                        const char *msg = (const char *)attr
 187                                + NLMSG_ALIGN(sizeof(*attr));
 188
 189                        if (err->error)
 190                                TAP_LOG(ERR, "%s", msg);
 191                        else
 192
 193                                TAP_LOG(WARNING, "%s", msg);
 194                        break;
 195                }
 196        }
 197}
 198#else
 199/*
 200 * External ACK support was added in Linux kernel 4.17
 201 * on older kernels, just ignore that part of message
 202 */
 203#define tap_nl_dump_ext_ack(nh, err) do { } while (0)
 204#endif
 205
 206/**
 207 * Check that the kernel sends an appropriate ACK in response
 208 * to an tap_nl_send().
 209 *
 210 * @param[in] nlsk_fd
 211 *   The netlink socket file descriptor used for communication.
 212 *
 213 * @return
 214 *   0 on success, -1 otherwise with errno set.
 215 */
 216int
 217tap_nl_recv_ack(int nlsk_fd)
 218{
 219        return tap_nl_recv(nlsk_fd, NULL, NULL);
 220}
 221
 222/**
 223 * Receive a message from the kernel on the netlink socket, following an
 224 * tap_nl_send().
 225 *
 226 * @param[in] nlsk_fd
 227 *   The netlink socket file descriptor used for communication.
 228 * @param[in] cb
 229 *   The callback function to call for each netlink message received.
 230 * @param[in, out] arg
 231 *   Custom arguments for the callback.
 232 *
 233 * @return
 234 *   0 on success, -1 otherwise with errno set.
 235 */
 236int
 237tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
 238{
 239        char buf[BUF_SIZE];
 240        int multipart = 0;
 241        int ret = 0;
 242
 243        do {
 244                struct nlmsghdr *nh;
 245                int recv_bytes;
 246
 247retry:
 248                recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0);
 249                if (recv_bytes < 0) {
 250                        if (errno == EINTR)
 251                                goto retry;
 252                        return -1;
 253                }
 254
 255                for (nh = (struct nlmsghdr *)buf;
 256                     NLMSG_OK(nh, (unsigned int)recv_bytes);
 257                     nh = NLMSG_NEXT(nh, recv_bytes)) {
 258                        if (nh->nlmsg_type == NLMSG_ERROR) {
 259                                struct nlmsgerr *err_data = NLMSG_DATA(nh);
 260
 261                                tap_nl_dump_ext_ack(nh, err_data);
 262                                if (err_data->error < 0) {
 263                                        errno = -err_data->error;
 264                                        return -1;
 265                                }
 266                                /* Ack message. */
 267                                return 0;
 268                        }
 269                        /* Multi-part msgs and their trailing DONE message. */
 270                        if (nh->nlmsg_flags & NLM_F_MULTI) {
 271                                if (nh->nlmsg_type == NLMSG_DONE)
 272                                        return 0;
 273                                multipart = 1;
 274                        }
 275                        if (cb)
 276                                ret = cb(nh, arg);
 277                }
 278        } while (multipart);
 279        return ret;
 280}
 281
 282/**
 283 * Append a netlink attribute to a message.
 284 *
 285 * @param[in, out] nh
 286 *   The netlink message to parse, received from the kernel.
 287 * @param[in] type
 288 *   The type of attribute to append.
 289 * @param[in] data_len
 290 *   The length of the data to append.
 291 * @param[in] data
 292 *   The data to append.
 293 */
 294void
 295tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
 296           unsigned int data_len, const void *data)
 297{
 298        /* see man 3 rtnetlink */
 299        struct rtattr *rta;
 300
 301        rta = (struct rtattr *)NLMSG_TAIL(nh);
 302        rta->rta_len = RTA_LENGTH(data_len);
 303        rta->rta_type = type;
 304        memcpy(RTA_DATA(rta), data, data_len);
 305        nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
 306}
 307
 308/**
 309 * Append a uint8_t netlink attribute to a message.
 310 *
 311 * @param[in, out] nh
 312 *   The netlink message to parse, received from the kernel.
 313 * @param[in] type
 314 *   The type of attribute to append.
 315 * @param[in] data
 316 *   The data to append.
 317 */
 318void
 319tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
 320{
 321        tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
 322}
 323
 324/**
 325 * Append a uint16_t netlink attribute to a message.
 326 *
 327 * @param[in, out] nh
 328 *   The netlink message to parse, received from the kernel.
 329 * @param[in] type
 330 *   The type of attribute to append.
 331 * @param[in] data
 332 *   The data to append.
 333 */
 334void
 335tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
 336{
 337        tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
 338}
 339
 340/**
 341 * Append a uint16_t netlink attribute to a message.
 342 *
 343 * @param[in, out] nh
 344 *   The netlink message to parse, received from the kernel.
 345 * @param[in] type
 346 *   The type of attribute to append.
 347 * @param[in] data
 348 *   The data to append.
 349 */
 350void
 351tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
 352{
 353        tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
 354}
 355
 356/**
 357 * Start a nested netlink attribute.
 358 * It must be followed later by a call to tap_nlattr_nested_finish().
 359 *
 360 * @param[in, out] msg
 361 *   The netlink message where to edit the nested_tails metadata.
 362 * @param[in] type
 363 *   The nested attribute type to append.
 364 *
 365 * @return
 366 *   -1 if adding a nested netlink attribute failed, 0 otherwise.
 367 */
 368int
 369tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type)
 370{
 371        struct nested_tail *tail;
 372
 373        tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
 374        if (!tail) {
 375                TAP_LOG(ERR,
 376                        "Couldn't allocate memory for nested netlink attribute");
 377                return -1;
 378        }
 379
 380        tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
 381
 382        tap_nlattr_add(&msg->nh, type, 0, NULL);
 383
 384        tail->prev = msg->nested_tails;
 385
 386        msg->nested_tails = tail;
 387
 388        return 0;
 389}
 390
 391/**
 392 * End a nested netlink attribute.
 393 * It follows a call to tap_nlattr_nested_start().
 394 * In effect, it will modify the nested attribute length to include every bytes
 395 * from the nested attribute start, up to here.
 396 *
 397 * @param[in, out] msg
 398 *   The netlink message where to edit the nested_tails metadata.
 399 */
 400void
 401tap_nlattr_nested_finish(struct nlmsg *msg)
 402{
 403        struct nested_tail *tail = msg->nested_tails;
 404
 405        tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
 406
 407        if (tail->prev)
 408                msg->nested_tails = tail->prev;
 409
 410        rte_free(tail);
 411}
 412