qemu/qemu-bridge-helper.c
<<
>>
Prefs
   1/*
   2 * QEMU Bridge Helper
   3 *
   4 * Copyright IBM, Corp. 2011
   5 *
   6 * Authors:
   7 * Anthony Liguori   <aliguori@us.ibm.com>
   8 * Richa Marwaha     <rmarwah@linux.vnet.ibm.com>
   9 * Corey Bryant      <coreyb@linux.vnet.ibm.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 */
  14
  15/*
  16 * Known shortcomings:
  17 * - There is no manual page
  18 * - The syntax of the ACL file is not documented anywhere
  19 * - parse_acl_file() doesn't report fopen() failure properly, fails
  20 *   to check ferror() after fgets() failure, arbitrarily truncates
  21 *   long lines, handles whitespace inconsistently, error messages
  22 *   don't point to the offending file and line, errors in included
  23 *   files are reported, but otherwise ignored, ...
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28
  29#include <sys/ioctl.h>
  30#include <sys/socket.h>
  31#include <sys/un.h>
  32#include <sys/prctl.h>
  33
  34#include <net/if.h>
  35
  36#include <linux/sockios.h>
  37
  38#ifndef SIOCBRADDIF
  39#include <linux/if_bridge.h>
  40#endif
  41
  42#include "qemu/queue.h"
  43
  44#include "net/tap-linux.h"
  45
  46#ifdef CONFIG_LIBCAP
  47#include <cap-ng.h>
  48#endif
  49
  50#define DEFAULT_ACL_FILE CONFIG_QEMU_CONFDIR "/bridge.conf"
  51
  52enum {
  53    ACL_ALLOW = 0,
  54    ACL_ALLOW_ALL,
  55    ACL_DENY,
  56    ACL_DENY_ALL,
  57};
  58
  59typedef struct ACLRule {
  60    int type;
  61    char iface[IFNAMSIZ];
  62    QSIMPLEQ_ENTRY(ACLRule) entry;
  63} ACLRule;
  64
  65typedef QSIMPLEQ_HEAD(ACLList, ACLRule) ACLList;
  66
  67static void usage(void)
  68{
  69    fprintf(stderr,
  70            "Usage: qemu-bridge-helper [--use-vnet] --br=bridge --fd=unixfd\n");
  71}
  72
  73static int parse_acl_file(const char *filename, ACLList *acl_list)
  74{
  75    FILE *f;
  76    char line[4096];
  77    ACLRule *acl_rule;
  78
  79    f = fopen(filename, "r");
  80    if (f == NULL) {
  81        return -1;
  82    }
  83
  84    while (fgets(line, sizeof(line), f) != NULL) {
  85        char *ptr = line;
  86        char *cmd, *arg, *argend;
  87
  88        while (g_ascii_isspace(*ptr)) {
  89            ptr++;
  90        }
  91
  92        /* skip comments and empty lines */
  93        if (*ptr == '#' || *ptr == 0) {
  94            continue;
  95        }
  96
  97        cmd = ptr;
  98        arg = strchr(cmd, ' ');
  99        if (arg == NULL) {
 100            arg = strchr(cmd, '\t');
 101        }
 102
 103        if (arg == NULL) {
 104            fprintf(stderr, "Invalid config line:\n  %s\n", line);
 105            goto err;
 106        }
 107
 108        *arg = 0;
 109        arg++;
 110        while (g_ascii_isspace(*arg)) {
 111            arg++;
 112        }
 113
 114        argend = arg + strlen(arg);
 115        while (arg != argend && g_ascii_isspace(*(argend - 1))) {
 116            argend--;
 117        }
 118        *argend = 0;
 119
 120        if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) {
 121            fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg));
 122            goto err;
 123        }
 124
 125        if (strcmp(cmd, "deny") == 0) {
 126            acl_rule = g_malloc(sizeof(*acl_rule));
 127            if (strcmp(arg, "all") == 0) {
 128                acl_rule->type = ACL_DENY_ALL;
 129            } else {
 130                acl_rule->type = ACL_DENY;
 131                snprintf(acl_rule->iface, IFNAMSIZ, "%s", arg);
 132            }
 133            QSIMPLEQ_INSERT_TAIL(acl_list, acl_rule, entry);
 134        } else if (strcmp(cmd, "allow") == 0) {
 135            acl_rule = g_malloc(sizeof(*acl_rule));
 136            if (strcmp(arg, "all") == 0) {
 137                acl_rule->type = ACL_ALLOW_ALL;
 138            } else {
 139                acl_rule->type = ACL_ALLOW;
 140                snprintf(acl_rule->iface, IFNAMSIZ, "%s", arg);
 141            }
 142            QSIMPLEQ_INSERT_TAIL(acl_list, acl_rule, entry);
 143        } else if (strcmp(cmd, "include") == 0) {
 144            /* ignore errors */
 145            parse_acl_file(arg, acl_list);
 146        } else {
 147            fprintf(stderr, "Unknown command `%s'\n", cmd);
 148            goto err;
 149        }
 150    }
 151
 152    fclose(f);
 153    return 0;
 154
 155err:
 156    fclose(f);
 157    errno = EINVAL;
 158    return -1;
 159
 160}
 161
 162static bool has_vnet_hdr(int fd)
 163{
 164    unsigned int features = 0;
 165
 166    if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
 167        return false;
 168    }
 169
 170    if (!(features & IFF_VNET_HDR)) {
 171        return false;
 172    }
 173
 174    return true;
 175}
 176
 177static void prep_ifreq(struct ifreq *ifr, const char *ifname)
 178{
 179    memset(ifr, 0, sizeof(*ifr));
 180    snprintf(ifr->ifr_name, IFNAMSIZ, "%s", ifname);
 181}
 182
 183static int send_fd(int c, int fd)
 184{
 185    char msgbuf[CMSG_SPACE(sizeof(fd))];
 186    struct msghdr msg = {
 187        .msg_control = msgbuf,
 188        .msg_controllen = sizeof(msgbuf),
 189    };
 190    struct cmsghdr *cmsg;
 191    struct iovec iov;
 192    char req[1] = { 0x00 };
 193
 194    cmsg = CMSG_FIRSTHDR(&msg);
 195    cmsg->cmsg_level = SOL_SOCKET;
 196    cmsg->cmsg_type = SCM_RIGHTS;
 197    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 198    msg.msg_controllen = cmsg->cmsg_len;
 199
 200    iov.iov_base = req;
 201    iov.iov_len = sizeof(req);
 202
 203    msg.msg_iov = &iov;
 204    msg.msg_iovlen = 1;
 205    memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
 206
 207    return sendmsg(c, &msg, 0);
 208}
 209
 210#ifdef CONFIG_LIBCAP
 211static int drop_privileges(void)
 212{
 213    /* clear all capabilities */
 214    capng_clear(CAPNG_SELECT_BOTH);
 215
 216    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
 217                     CAP_NET_ADMIN) < 0) {
 218        return -1;
 219    }
 220
 221    /* change to calling user's real uid and gid, retaining supplemental
 222     * groups and CAP_NET_ADMIN */
 223    if (capng_change_id(getuid(), getgid(), CAPNG_CLEAR_BOUNDING)) {
 224        return -1;
 225    }
 226
 227    return 0;
 228}
 229#endif
 230
 231int main(int argc, char **argv)
 232{
 233    struct ifreq ifr;
 234#ifndef SIOCBRADDIF
 235    unsigned long ifargs[4];
 236#endif
 237    int ifindex;
 238    int fd = -1, ctlfd = -1, unixfd = -1;
 239    int use_vnet = 0;
 240    int mtu;
 241    const char *bridge = NULL;
 242    char iface[IFNAMSIZ];
 243    int index;
 244    ACLRule *acl_rule;
 245    ACLList acl_list;
 246    int access_allowed, access_denied;
 247    int ret = EXIT_SUCCESS;
 248
 249#ifdef CONFIG_LIBCAP
 250    /* if we're run from an suid binary, immediately drop privileges preserving
 251     * cap_net_admin */
 252    if (geteuid() == 0 && getuid() != geteuid()) {
 253        if (drop_privileges() == -1) {
 254            fprintf(stderr, "failed to drop privileges\n");
 255            return 1;
 256        }
 257    }
 258#endif
 259
 260    /* parse arguments */
 261    for (index = 1; index < argc; index++) {
 262        if (strcmp(argv[index], "--use-vnet") == 0) {
 263            use_vnet = 1;
 264        } else if (strncmp(argv[index], "--br=", 5) == 0) {
 265            bridge = &argv[index][5];
 266        } else if (strncmp(argv[index], "--fd=", 5) == 0) {
 267            unixfd = atoi(&argv[index][5]);
 268        } else {
 269            usage();
 270            return EXIT_FAILURE;
 271        }
 272    }
 273
 274    if (bridge == NULL || unixfd == -1) {
 275        usage();
 276        return EXIT_FAILURE;
 277    }
 278    if (strlen(bridge) >= IFNAMSIZ) {
 279        fprintf(stderr, "name `%s' too long: %zu\n", bridge, strlen(bridge));
 280        return EXIT_FAILURE;
 281    }
 282
 283    /* parse default acl file */
 284    QSIMPLEQ_INIT(&acl_list);
 285    if (parse_acl_file(DEFAULT_ACL_FILE, &acl_list) == -1) {
 286        fprintf(stderr, "failed to parse default acl file `%s'\n",
 287                DEFAULT_ACL_FILE);
 288        ret = EXIT_FAILURE;
 289        goto cleanup;
 290    }
 291
 292    /* validate bridge against acl -- default policy is to deny
 293     * according acl policy if we have a deny and allow both
 294     * then deny should always win over allow
 295     */
 296    access_allowed = 0;
 297    access_denied = 0;
 298    QSIMPLEQ_FOREACH(acl_rule, &acl_list, entry) {
 299        switch (acl_rule->type) {
 300        case ACL_ALLOW_ALL:
 301            access_allowed = 1;
 302            break;
 303        case ACL_ALLOW:
 304            if (strcmp(bridge, acl_rule->iface) == 0) {
 305                access_allowed = 1;
 306            }
 307            break;
 308        case ACL_DENY_ALL:
 309            access_denied = 1;
 310            break;
 311        case ACL_DENY:
 312            if (strcmp(bridge, acl_rule->iface) == 0) {
 313                access_denied = 1;
 314            }
 315            break;
 316        }
 317    }
 318
 319    if ((access_allowed == 0) || (access_denied == 1)) {
 320        fprintf(stderr, "access denied by acl file\n");
 321        ret = EXIT_FAILURE;
 322        goto cleanup;
 323    }
 324
 325    /* open a socket to use to control the network interfaces */
 326    ctlfd = socket(AF_INET, SOCK_STREAM, 0);
 327    if (ctlfd == -1) {
 328        fprintf(stderr, "failed to open control socket: %s\n", strerror(errno));
 329        ret = EXIT_FAILURE;
 330        goto cleanup;
 331    }
 332
 333    /* open the tap device */
 334    fd = open("/dev/net/tun", O_RDWR);
 335    if (fd == -1) {
 336        fprintf(stderr, "failed to open /dev/net/tun: %s\n", strerror(errno));
 337        ret = EXIT_FAILURE;
 338        goto cleanup;
 339    }
 340
 341    /* request a tap device, disable PI, and add vnet header support if
 342     * requested and it's available. */
 343    prep_ifreq(&ifr, "tap%d");
 344    ifr.ifr_flags = IFF_TAP|IFF_NO_PI;
 345    if (use_vnet && has_vnet_hdr(fd)) {
 346        ifr.ifr_flags |= IFF_VNET_HDR;
 347    }
 348
 349    if (ioctl(fd, TUNSETIFF, &ifr) == -1) {
 350        fprintf(stderr, "failed to create tun device: %s\n", strerror(errno));
 351        ret = EXIT_FAILURE;
 352        goto cleanup;
 353    }
 354
 355    /* save tap device name */
 356    snprintf(iface, sizeof(iface), "%s", ifr.ifr_name);
 357
 358    /* get the mtu of the bridge */
 359    prep_ifreq(&ifr, bridge);
 360    if (ioctl(ctlfd, SIOCGIFMTU, &ifr) == -1) {
 361        fprintf(stderr, "failed to get mtu of bridge `%s': %s\n",
 362                bridge, strerror(errno));
 363        ret = EXIT_FAILURE;
 364        goto cleanup;
 365    }
 366
 367    /* save mtu */
 368    mtu = ifr.ifr_mtu;
 369
 370    /* set the mtu of the interface based on the bridge */
 371    prep_ifreq(&ifr, iface);
 372    ifr.ifr_mtu = mtu;
 373    if (ioctl(ctlfd, SIOCSIFMTU, &ifr) == -1) {
 374        fprintf(stderr, "failed to set mtu of device `%s' to %d: %s\n",
 375                iface, mtu, strerror(errno));
 376        ret = EXIT_FAILURE;
 377        goto cleanup;
 378    }
 379
 380    /* Linux uses the lowest enslaved MAC address as the MAC address of
 381     * the bridge.  Set MAC address to a high value so that it doesn't
 382     * affect the MAC address of the bridge.
 383     */
 384    if (ioctl(ctlfd, SIOCGIFHWADDR, &ifr) < 0) {
 385        fprintf(stderr, "failed to get MAC address of device `%s': %s\n",
 386                iface, strerror(errno));
 387        ret = EXIT_FAILURE;
 388        goto cleanup;
 389    }
 390    ifr.ifr_hwaddr.sa_data[0] = 0xFE;
 391    if (ioctl(ctlfd, SIOCSIFHWADDR, &ifr) < 0) {
 392        fprintf(stderr, "failed to set MAC address of device `%s': %s\n",
 393                iface, strerror(errno));
 394        ret = EXIT_FAILURE;
 395        goto cleanup;
 396    }
 397
 398    /* add the interface to the bridge */
 399    prep_ifreq(&ifr, bridge);
 400    ifindex = if_nametoindex(iface);
 401#ifndef SIOCBRADDIF
 402    ifargs[0] = BRCTL_ADD_IF;
 403    ifargs[1] = ifindex;
 404    ifargs[2] = 0;
 405    ifargs[3] = 0;
 406    ifr.ifr_data = (void *)ifargs;
 407    ret = ioctl(ctlfd, SIOCDEVPRIVATE, &ifr);
 408#else
 409    ifr.ifr_ifindex = ifindex;
 410    ret = ioctl(ctlfd, SIOCBRADDIF, &ifr);
 411#endif
 412    if (ret == -1) {
 413        fprintf(stderr, "failed to add interface `%s' to bridge `%s': %s\n",
 414                iface, bridge, strerror(errno));
 415        ret = EXIT_FAILURE;
 416        goto cleanup;
 417    }
 418
 419    /* bring the interface up */
 420    prep_ifreq(&ifr, iface);
 421    if (ioctl(ctlfd, SIOCGIFFLAGS, &ifr) == -1) {
 422        fprintf(stderr, "failed to get interface flags for `%s': %s\n",
 423                iface, strerror(errno));
 424        ret = EXIT_FAILURE;
 425        goto cleanup;
 426    }
 427
 428    ifr.ifr_flags |= IFF_UP;
 429    if (ioctl(ctlfd, SIOCSIFFLAGS, &ifr) == -1) {
 430        fprintf(stderr, "failed to bring up interface `%s': %s\n",
 431                iface, strerror(errno));
 432        ret = EXIT_FAILURE;
 433        goto cleanup;
 434    }
 435
 436    /* write fd to the domain socket */
 437    if (send_fd(unixfd, fd) == -1) {
 438        fprintf(stderr, "failed to write fd to unix socket: %s\n",
 439                strerror(errno));
 440        ret = EXIT_FAILURE;
 441        goto cleanup;
 442    }
 443
 444    /* ... */
 445
 446    /* profit! */
 447
 448cleanup:
 449    if (fd >= 0) {
 450        close(fd);
 451    }
 452    if (ctlfd >= 0) {
 453        close(ctlfd);
 454    }
 455    while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
 456        QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
 457        g_free(acl_rule);
 458    }
 459
 460    return ret;
 461}
 462