qemu/qemu-bridge-helper.c
<<
>>
Prefs
   1/*
   2 * QEMU Bridge Helper
   3 *
   4 * Copyright IBM, Corp. 2011
   5 *
   6 * Authors:
   7 * Anthony Liguori   <aliguori@us.ibm.com>
   8 * Richa Marwaha     <rmarwah@linux.vnet.ibm.com>
   9 * Corey Bryant      <coreyb@linux.vnet.ibm.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2.  See
  12 * the COPYING file in the top-level directory.
  13 */
  14
  15/*
  16 * Known shortcomings:
  17 * - There is no manual page
  18 * - The syntax of the ACL file is not documented anywhere
  19 * - parse_acl_file() doesn't report fopen() failure properly, fails
  20 *   to check ferror() after fgets() failure, arbitrarily truncates
  21 *   long lines, handles whitespace inconsistently, error messages
  22 *   don't point to the offending file and line, errors in included
  23 *   files are reported, but otherwise ignored, ...
  24 */
  25
  26#include "qemu/osdep.h"
  27
  28
  29#include <sys/ioctl.h>
  30#include <sys/socket.h>
  31#include <sys/un.h>
  32#include <sys/prctl.h>
  33
  34#include <net/if.h>
  35
  36#include <linux/sockios.h>
  37
  38#ifndef SIOCBRADDIF
  39#include <linux/if_bridge.h>
  40#endif
  41
  42#include "qemu/queue.h"
  43#include "qemu/cutils.h"
  44
  45#include "net/tap-linux.h"
  46
  47#ifdef CONFIG_LIBCAP_NG
  48#include <cap-ng.h>
  49#endif
  50
  51#define DEFAULT_ACL_FILE CONFIG_QEMU_CONFDIR "/bridge.conf"
  52
  53enum {
  54    ACL_ALLOW = 0,
  55    ACL_ALLOW_ALL,
  56    ACL_DENY,
  57    ACL_DENY_ALL,
  58};
  59
  60typedef struct ACLRule {
  61    int type;
  62    char iface[IFNAMSIZ];
  63    QSIMPLEQ_ENTRY(ACLRule) entry;
  64} ACLRule;
  65
  66typedef QSIMPLEQ_HEAD(ACLList, ACLRule) ACLList;
  67
  68static void usage(void)
  69{
  70    fprintf(stderr,
  71            "Usage: qemu-bridge-helper [--use-vnet] --br=bridge --fd=unixfd\n");
  72}
  73
  74static int parse_acl_file(const char *filename, ACLList *acl_list)
  75{
  76    FILE *f;
  77    char line[4096];
  78    ACLRule *acl_rule;
  79
  80    f = fopen(filename, "r");
  81    if (f == NULL) {
  82        return -1;
  83    }
  84
  85    while (fgets(line, sizeof(line), f) != NULL) {
  86        char *ptr = line;
  87        char *cmd, *arg, *argend;
  88
  89        while (g_ascii_isspace(*ptr)) {
  90            ptr++;
  91        }
  92
  93        /* skip comments and empty lines */
  94        if (*ptr == '#' || *ptr == 0) {
  95            continue;
  96        }
  97
  98        cmd = ptr;
  99        arg = strchr(cmd, ' ');
 100        if (arg == NULL) {
 101            arg = strchr(cmd, '\t');
 102        }
 103
 104        if (arg == NULL) {
 105            fprintf(stderr, "Invalid config line:\n  %s\n", line);
 106            goto err;
 107        }
 108
 109        *arg = 0;
 110        arg++;
 111        while (g_ascii_isspace(*arg)) {
 112            arg++;
 113        }
 114
 115        argend = arg + strlen(arg);
 116        while (arg != argend && g_ascii_isspace(*(argend - 1))) {
 117            argend--;
 118        }
 119        *argend = 0;
 120
 121        if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) {
 122            fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg));
 123            goto err;
 124        }
 125
 126        if (strcmp(cmd, "deny") == 0) {
 127            acl_rule = g_malloc(sizeof(*acl_rule));
 128            if (strcmp(arg, "all") == 0) {
 129                acl_rule->type = ACL_DENY_ALL;
 130            } else {
 131                acl_rule->type = ACL_DENY;
 132                snprintf(acl_rule->iface, IFNAMSIZ, "%s", arg);
 133            }
 134            QSIMPLEQ_INSERT_TAIL(acl_list, acl_rule, entry);
 135        } else if (strcmp(cmd, "allow") == 0) {
 136            acl_rule = g_malloc(sizeof(*acl_rule));
 137            if (strcmp(arg, "all") == 0) {
 138                acl_rule->type = ACL_ALLOW_ALL;
 139            } else {
 140                acl_rule->type = ACL_ALLOW;
 141                snprintf(acl_rule->iface, IFNAMSIZ, "%s", arg);
 142            }
 143            QSIMPLEQ_INSERT_TAIL(acl_list, acl_rule, entry);
 144        } else if (strcmp(cmd, "include") == 0) {
 145            /* ignore errors */
 146            parse_acl_file(arg, acl_list);
 147        } else {
 148            fprintf(stderr, "Unknown command `%s'\n", cmd);
 149            goto err;
 150        }
 151    }
 152
 153    fclose(f);
 154    return 0;
 155
 156err:
 157    fclose(f);
 158    errno = EINVAL;
 159    return -1;
 160
 161}
 162
 163static bool has_vnet_hdr(int fd)
 164{
 165    unsigned int features = 0;
 166
 167    if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
 168        return false;
 169    }
 170
 171    if (!(features & IFF_VNET_HDR)) {
 172        return false;
 173    }
 174
 175    return true;
 176}
 177
 178static void prep_ifreq(struct ifreq *ifr, const char *ifname)
 179{
 180    memset(ifr, 0, sizeof(*ifr));
 181    snprintf(ifr->ifr_name, IFNAMSIZ, "%s", ifname);
 182}
 183
 184static int send_fd(int c, int fd)
 185{
 186    char msgbuf[CMSG_SPACE(sizeof(fd))];
 187    struct msghdr msg = {
 188        .msg_control = msgbuf,
 189        .msg_controllen = sizeof(msgbuf),
 190    };
 191    struct cmsghdr *cmsg;
 192    struct iovec iov;
 193    char req[1] = { 0x00 };
 194
 195    cmsg = CMSG_FIRSTHDR(&msg);
 196    cmsg->cmsg_level = SOL_SOCKET;
 197    cmsg->cmsg_type = SCM_RIGHTS;
 198    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
 199    msg.msg_controllen = cmsg->cmsg_len;
 200
 201    iov.iov_base = req;
 202    iov.iov_len = sizeof(req);
 203
 204    msg.msg_iov = &iov;
 205    msg.msg_iovlen = 1;
 206    memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
 207
 208    return sendmsg(c, &msg, 0);
 209}
 210
 211#ifdef CONFIG_LIBCAP_NG
 212static int drop_privileges(void)
 213{
 214    /* clear all capabilities */
 215    capng_clear(CAPNG_SELECT_BOTH);
 216
 217    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
 218                     CAP_NET_ADMIN) < 0) {
 219        return -1;
 220    }
 221
 222    /* change to calling user's real uid and gid, retaining supplemental
 223     * groups and CAP_NET_ADMIN */
 224    if (capng_change_id(getuid(), getgid(), CAPNG_CLEAR_BOUNDING)) {
 225        return -1;
 226    }
 227
 228    return 0;
 229}
 230#endif
 231
 232int main(int argc, char **argv)
 233{
 234    struct ifreq ifr;
 235#ifndef SIOCBRADDIF
 236    unsigned long ifargs[4];
 237#endif
 238    int ifindex;
 239    int fd = -1, ctlfd = -1, unixfd = -1;
 240    int use_vnet = 0;
 241    int mtu;
 242    const char *bridge = NULL;
 243    char iface[IFNAMSIZ];
 244    int index;
 245    ACLRule *acl_rule;
 246    ACLList acl_list;
 247    int access_allowed, access_denied;
 248    int ret = EXIT_SUCCESS;
 249    g_autofree char *acl_file = NULL;
 250
 251#ifdef CONFIG_LIBCAP_NG
 252    /* if we're run from an suid binary, immediately drop privileges preserving
 253     * cap_net_admin */
 254    if (geteuid() == 0 && getuid() != geteuid()) {
 255        if (drop_privileges() == -1) {
 256            fprintf(stderr, "failed to drop privileges\n");
 257            return 1;
 258        }
 259    }
 260#endif
 261
 262    qemu_init_exec_dir(argv[0]);
 263
 264    /* parse arguments */
 265    for (index = 1; index < argc; index++) {
 266        if (strcmp(argv[index], "--use-vnet") == 0) {
 267            use_vnet = 1;
 268        } else if (strncmp(argv[index], "--br=", 5) == 0) {
 269            bridge = &argv[index][5];
 270        } else if (strncmp(argv[index], "--fd=", 5) == 0) {
 271            unixfd = atoi(&argv[index][5]);
 272        } else {
 273            usage();
 274            return EXIT_FAILURE;
 275        }
 276    }
 277
 278    if (bridge == NULL || unixfd == -1) {
 279        usage();
 280        return EXIT_FAILURE;
 281    }
 282    if (strlen(bridge) >= IFNAMSIZ) {
 283        fprintf(stderr, "name `%s' too long: %zu\n", bridge, strlen(bridge));
 284        return EXIT_FAILURE;
 285    }
 286
 287    /* parse default acl file */
 288    QSIMPLEQ_INIT(&acl_list);
 289    acl_file = get_relocated_path(DEFAULT_ACL_FILE);
 290    if (parse_acl_file(acl_file, &acl_list) == -1) {
 291        fprintf(stderr, "failed to parse default acl file `%s'\n",
 292                acl_file);
 293        ret = EXIT_FAILURE;
 294        goto cleanup;
 295    }
 296
 297    /* validate bridge against acl -- default policy is to deny
 298     * according acl policy if we have a deny and allow both
 299     * then deny should always win over allow
 300     */
 301    access_allowed = 0;
 302    access_denied = 0;
 303    QSIMPLEQ_FOREACH(acl_rule, &acl_list, entry) {
 304        switch (acl_rule->type) {
 305        case ACL_ALLOW_ALL:
 306            access_allowed = 1;
 307            break;
 308        case ACL_ALLOW:
 309            if (strcmp(bridge, acl_rule->iface) == 0) {
 310                access_allowed = 1;
 311            }
 312            break;
 313        case ACL_DENY_ALL:
 314            access_denied = 1;
 315            break;
 316        case ACL_DENY:
 317            if (strcmp(bridge, acl_rule->iface) == 0) {
 318                access_denied = 1;
 319            }
 320            break;
 321        }
 322    }
 323
 324    if ((access_allowed == 0) || (access_denied == 1)) {
 325        fprintf(stderr, "access denied by acl file\n");
 326        ret = EXIT_FAILURE;
 327        goto cleanup;
 328    }
 329
 330    /* open a socket to use to control the network interfaces */
 331    ctlfd = socket(AF_INET, SOCK_STREAM, 0);
 332    if (ctlfd == -1) {
 333        fprintf(stderr, "failed to open control socket: %s\n", strerror(errno));
 334        ret = EXIT_FAILURE;
 335        goto cleanup;
 336    }
 337
 338    /* open the tap device */
 339    fd = open("/dev/net/tun", O_RDWR);
 340    if (fd == -1) {
 341        fprintf(stderr, "failed to open /dev/net/tun: %s\n", strerror(errno));
 342        ret = EXIT_FAILURE;
 343        goto cleanup;
 344    }
 345
 346    /* request a tap device, disable PI, and add vnet header support if
 347     * requested and it's available. */
 348    prep_ifreq(&ifr, "tap%d");
 349    ifr.ifr_flags = IFF_TAP|IFF_NO_PI;
 350    if (use_vnet && has_vnet_hdr(fd)) {
 351        ifr.ifr_flags |= IFF_VNET_HDR;
 352    }
 353
 354    if (ioctl(fd, TUNSETIFF, &ifr) == -1) {
 355        fprintf(stderr, "failed to create tun device: %s\n", strerror(errno));
 356        ret = EXIT_FAILURE;
 357        goto cleanup;
 358    }
 359
 360    /* save tap device name */
 361    snprintf(iface, sizeof(iface), "%s", ifr.ifr_name);
 362
 363    /* get the mtu of the bridge */
 364    prep_ifreq(&ifr, bridge);
 365    if (ioctl(ctlfd, SIOCGIFMTU, &ifr) == -1) {
 366        fprintf(stderr, "failed to get mtu of bridge `%s': %s\n",
 367                bridge, strerror(errno));
 368        ret = EXIT_FAILURE;
 369        goto cleanup;
 370    }
 371
 372    /* save mtu */
 373    mtu = ifr.ifr_mtu;
 374
 375    /* set the mtu of the interface based on the bridge */
 376    prep_ifreq(&ifr, iface);
 377    ifr.ifr_mtu = mtu;
 378    if (ioctl(ctlfd, SIOCSIFMTU, &ifr) == -1) {
 379        fprintf(stderr, "failed to set mtu of device `%s' to %d: %s\n",
 380                iface, mtu, strerror(errno));
 381        ret = EXIT_FAILURE;
 382        goto cleanup;
 383    }
 384
 385    /* Linux uses the lowest enslaved MAC address as the MAC address of
 386     * the bridge.  Set MAC address to a high value so that it doesn't
 387     * affect the MAC address of the bridge.
 388     */
 389    if (ioctl(ctlfd, SIOCGIFHWADDR, &ifr) < 0) {
 390        fprintf(stderr, "failed to get MAC address of device `%s': %s\n",
 391                iface, strerror(errno));
 392        ret = EXIT_FAILURE;
 393        goto cleanup;
 394    }
 395    ifr.ifr_hwaddr.sa_data[0] = 0xFE;
 396    if (ioctl(ctlfd, SIOCSIFHWADDR, &ifr) < 0) {
 397        fprintf(stderr, "failed to set MAC address of device `%s': %s\n",
 398                iface, strerror(errno));
 399        ret = EXIT_FAILURE;
 400        goto cleanup;
 401    }
 402
 403    /* add the interface to the bridge */
 404    prep_ifreq(&ifr, bridge);
 405    ifindex = if_nametoindex(iface);
 406#ifndef SIOCBRADDIF
 407    ifargs[0] = BRCTL_ADD_IF;
 408    ifargs[1] = ifindex;
 409    ifargs[2] = 0;
 410    ifargs[3] = 0;
 411    ifr.ifr_data = (void *)ifargs;
 412    ret = ioctl(ctlfd, SIOCDEVPRIVATE, &ifr);
 413#else
 414    ifr.ifr_ifindex = ifindex;
 415    ret = ioctl(ctlfd, SIOCBRADDIF, &ifr);
 416#endif
 417    if (ret == -1) {
 418        fprintf(stderr, "failed to add interface `%s' to bridge `%s': %s\n",
 419                iface, bridge, strerror(errno));
 420        ret = EXIT_FAILURE;
 421        goto cleanup;
 422    }
 423
 424    /* bring the interface up */
 425    prep_ifreq(&ifr, iface);
 426    if (ioctl(ctlfd, SIOCGIFFLAGS, &ifr) == -1) {
 427        fprintf(stderr, "failed to get interface flags for `%s': %s\n",
 428                iface, strerror(errno));
 429        ret = EXIT_FAILURE;
 430        goto cleanup;
 431    }
 432
 433    ifr.ifr_flags |= IFF_UP;
 434    if (ioctl(ctlfd, SIOCSIFFLAGS, &ifr) == -1) {
 435        fprintf(stderr, "failed to bring up interface `%s': %s\n",
 436                iface, strerror(errno));
 437        ret = EXIT_FAILURE;
 438        goto cleanup;
 439    }
 440
 441    /* write fd to the domain socket */
 442    if (send_fd(unixfd, fd) == -1) {
 443        fprintf(stderr, "failed to write fd to unix socket: %s\n",
 444                strerror(errno));
 445        ret = EXIT_FAILURE;
 446        goto cleanup;
 447    }
 448
 449    /* ... */
 450
 451    /* profit! */
 452
 453cleanup:
 454    if (fd >= 0) {
 455        close(fd);
 456    }
 457    if (ctlfd >= 0) {
 458        close(ctlfd);
 459    }
 460    while ((acl_rule = QSIMPLEQ_FIRST(&acl_list)) != NULL) {
 461        QSIMPLEQ_REMOVE_HEAD(&acl_list, entry);
 462        g_free(acl_rule);
 463    }
 464
 465    return ret;
 466}
 467