iproute2/ip/ipvrf.c
<<
>>
Prefs
   1/*
   2 * ipvrf.c      "ip vrf"
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     David Ahern <dsa@cumulusnetworks.com>
  10 *
  11 */
  12
  13#include <sys/types.h>
  14#include <sys/stat.h>
  15#include <sys/socket.h>
  16#include <sys/mount.h>
  17#include <linux/bpf.h>
  18#include <linux/if.h>
  19#include <fcntl.h>
  20#include <stdio.h>
  21#include <stdlib.h>
  22#include <unistd.h>
  23#include <string.h>
  24#include <dirent.h>
  25#include <errno.h>
  26#include <limits.h>
  27
  28#include "rt_names.h"
  29#include "utils.h"
  30#include "ip_common.h"
  31#include "bpf_util.h"
  32
  33#define CGRP_PROC_FILE  "/cgroup.procs"
  34
  35static struct link_filter vrf_filter;
  36
  37static void usage(void)
  38{
  39        fprintf(stderr,
  40                "Usage: ip vrf show [NAME] ...\n"
  41                "       ip vrf exec [NAME] cmd ...\n"
  42                "       ip vrf identify [PID]\n"
  43                "       ip vrf pids [NAME]\n");
  44
  45        exit(-1);
  46}
  47
  48/*
  49 * parse process based cgroup file looking for PATH/vrf/NAME where
  50 * NAME is the name of the vrf the process is associated with
  51 */
  52static int vrf_identify(pid_t pid, char *name, size_t len)
  53{
  54        char path[PATH_MAX];
  55        char buf[4096];
  56        char *vrf, *end;
  57        FILE *fp;
  58
  59        snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
  60        fp = fopen(path, "r");
  61        if (!fp)
  62                return -1;
  63
  64        memset(name, 0, len);
  65
  66        while (fgets(buf, sizeof(buf), fp)) {
  67                /* want the controller-less cgroup */
  68                if (strstr(buf, "::/") == NULL)
  69                        continue;
  70
  71                vrf = strstr(buf, "/vrf/");
  72                if (vrf) {
  73                        vrf += 5;  /* skip past "/vrf/" */
  74                        end = strchr(vrf, '\n');
  75                        if (end)
  76                                *end = '\0';
  77
  78                        strlcpy(name, vrf, len);
  79                        break;
  80                }
  81        }
  82
  83        fclose(fp);
  84
  85        return 0;
  86}
  87
  88static int ipvrf_identify(int argc, char **argv)
  89{
  90        char vrf[32];
  91        int rc;
  92        unsigned int pid;
  93
  94        if (argc < 1)
  95                pid = getpid();
  96        else if (argc > 1)
  97                invarg("Extra arguments specified\n", argv[1]);
  98        else if (get_unsigned(&pid, argv[0], 10))
  99                invarg("Invalid pid\n", argv[0]);
 100
 101        rc = vrf_identify(pid, vrf, sizeof(vrf));
 102        if (!rc) {
 103                if (vrf[0] != '\0')
 104                        printf("%s\n", vrf);
 105        } else {
 106                fprintf(stderr, "Failed to lookup vrf association: %s\n",
 107                        strerror(errno));
 108        }
 109
 110        return rc;
 111}
 112
 113/* read PATH/vrf/NAME/cgroup.procs file */
 114static void read_cgroup_pids(const char *base_path, char *name)
 115{
 116        char path[PATH_MAX];
 117        char buf[4096];
 118        FILE *fp;
 119
 120        if (snprintf(path, sizeof(path), "%s/vrf/%s%s",
 121                     base_path, name, CGRP_PROC_FILE) >= sizeof(path))
 122                return;
 123
 124        fp = fopen(path, "r");
 125        if (!fp)
 126                return; /* no cgroup file, nothing to show */
 127
 128        /* dump contents (pids) of cgroup.procs */
 129        while (fgets(buf, sizeof(buf), fp)) {
 130                char *nl, comm[32];
 131
 132                nl = strchr(buf, '\n');
 133                if (nl)
 134                        *nl = '\0';
 135
 136                if (get_command_name(buf, comm, sizeof(comm)))
 137                        strcpy(comm, "<terminated?>");
 138
 139                printf("%5s  %s\n", buf, comm);
 140        }
 141
 142        fclose(fp);
 143}
 144
 145/* recurse path looking for PATH[/NETNS]/vrf/NAME */
 146static int recurse_dir(char *base_path, char *name, const char *netns)
 147{
 148        char path[PATH_MAX];
 149        struct dirent *de;
 150        struct stat fstat;
 151        int rc;
 152        DIR *d;
 153
 154        d = opendir(base_path);
 155        if (!d)
 156                return -1;
 157
 158        while ((de = readdir(d)) != NULL) {
 159                if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
 160                        continue;
 161
 162                if (!strcmp(de->d_name, "vrf")) {
 163                        const char *pdir = strrchr(base_path, '/');
 164
 165                        /* found a 'vrf' directory. if it is for the given
 166                         * namespace then dump the cgroup pids
 167                         */
 168                        if (*netns == '\0' ||
 169                            (pdir && !strcmp(pdir+1, netns)))
 170                                read_cgroup_pids(base_path, name);
 171
 172                        continue;
 173                }
 174
 175                /* is this a subdir that needs to be walked */
 176                if (snprintf(path, sizeof(path), "%s/%s",
 177                             base_path, de->d_name) >= sizeof(path))
 178                        continue;
 179
 180                if (lstat(path, &fstat) < 0)
 181                        continue;
 182
 183                if (S_ISDIR(fstat.st_mode)) {
 184                        rc = recurse_dir(path, name, netns);
 185                        if (rc != 0)
 186                                goto out;
 187                }
 188        }
 189
 190        rc = 0;
 191out:
 192        closedir(d);
 193
 194        return rc;
 195}
 196
 197static int ipvrf_get_netns(char *netns, int len)
 198{
 199        if (netns_identify_pid("self", netns, len-3)) {
 200                fprintf(stderr, "Failed to get name of network namespace: %s\n",
 201                        strerror(errno));
 202                return -1;
 203        }
 204
 205        if (*netns != '\0')
 206                strcat(netns, "-ns");
 207
 208        return 0;
 209}
 210
 211static int ipvrf_pids(int argc, char **argv)
 212{
 213        char *mnt, *vrf;
 214        char netns[256];
 215        int ret = -1;
 216
 217        if (argc != 1) {
 218                fprintf(stderr, "Invalid arguments\n");
 219                return -1;
 220        }
 221
 222        vrf = argv[0];
 223        if (!name_is_vrf(vrf)) {
 224                fprintf(stderr, "Invalid VRF name\n");
 225                return -1;
 226        }
 227
 228        mnt = find_cgroup2_mount(true);
 229        if (!mnt)
 230                return -1;
 231
 232        if (ipvrf_get_netns(netns, sizeof(netns)) < 0)
 233                goto out;
 234
 235        ret = recurse_dir(mnt, vrf, netns);
 236
 237out:
 238        free(mnt);
 239
 240        return ret;
 241}
 242
 243/* load BPF program to set sk_bound_dev_if for sockets */
 244static char bpf_log_buf[256*1024];
 245
 246static int prog_load(int idx)
 247{
 248        struct bpf_insn prog[] = {
 249                BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
 250                BPF_MOV64_IMM(BPF_REG_3, idx),
 251                BPF_MOV64_IMM(BPF_REG_2,
 252                              offsetof(struct bpf_sock, bound_dev_if)),
 253                BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
 254                            offsetof(struct bpf_sock, bound_dev_if)),
 255                BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
 256                BPF_EXIT_INSN(),
 257        };
 258
 259        return bpf_program_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
 260                                "GPL", bpf_log_buf, sizeof(bpf_log_buf));
 261}
 262
 263static int vrf_configure_cgroup(const char *path, int ifindex)
 264{
 265        int rc = -1, cg_fd, prog_fd = -1;
 266
 267        cg_fd = open(path, O_DIRECTORY | O_RDONLY);
 268        if (cg_fd < 0) {
 269                fprintf(stderr,
 270                        "Failed to open cgroup path: '%s'\n",
 271                        strerror(errno));
 272                goto out;
 273        }
 274
 275        /*
 276         * Load bpf program into kernel and attach to cgroup to affect
 277         * socket creates
 278         */
 279        prog_fd = prog_load(ifindex);
 280        if (prog_fd < 0) {
 281                fprintf(stderr, "Failed to load BPF prog: '%s'\n%s",
 282                        strerror(errno), bpf_log_buf);
 283
 284                if (errno != EPERM) {
 285                        fprintf(stderr,
 286                                "Kernel compiled with CGROUP_BPF enabled?\n");
 287                }
 288                goto out;
 289        }
 290
 291        if (bpf_program_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) {
 292                fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n",
 293                        strerror(errno));
 294                goto out;
 295        }
 296
 297        rc = 0;
 298out:
 299        close(cg_fd);
 300        close(prog_fd);
 301
 302        return rc;
 303}
 304
 305/* get base path for controller-less cgroup for a process.
 306 * path returned does not include /vrf/NAME if it exists
 307 */
 308static int vrf_path(char *vpath, size_t len)
 309{
 310        char path[PATH_MAX];
 311        char buf[4096];
 312        char *vrf;
 313        FILE *fp;
 314
 315        snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid());
 316        fp = fopen(path, "r");
 317        if (!fp)
 318                return -1;
 319
 320        vpath[0] = '\0';
 321
 322        while (fgets(buf, sizeof(buf), fp)) {
 323                char *start, *nl;
 324
 325                start = strstr(buf, "::/");
 326                if (!start)
 327                        continue;
 328
 329                /* advance past '::' */
 330                start += 2;
 331
 332                nl = strchr(start, '\n');
 333                if (nl)
 334                        *nl = '\0';
 335
 336                vrf = strstr(start, "/vrf");
 337                if (vrf)
 338                        *vrf = '\0';
 339
 340                strlcpy(vpath, start, len);
 341
 342                /* if vrf path is just / then return nothing */
 343                if (!strcmp(vpath, "/"))
 344                        vpath[0] = '\0';
 345
 346                break;
 347        }
 348
 349        fclose(fp);
 350
 351        return 0;
 352}
 353
 354static int vrf_switch(const char *name)
 355{
 356        char path[PATH_MAX], *mnt, pid[16];
 357        char vpath[PATH_MAX], netns[256];
 358        int ifindex = 0;
 359        int rc = -1, len, fd = -1;
 360
 361        if (strcmp(name, "default")) {
 362                ifindex = name_is_vrf(name);
 363                if (!ifindex) {
 364                        fprintf(stderr, "Invalid VRF name\n");
 365                        return -1;
 366                }
 367        }
 368
 369        mnt = find_cgroup2_mount(true);
 370        if (!mnt)
 371                return -1;
 372
 373        /* -1 on length to add '/' to the end */
 374        if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0)
 375                goto out;
 376
 377        if (vrf_path(vpath, sizeof(vpath)) < 0) {
 378                fprintf(stderr, "Failed to get base cgroup path: %s\n",
 379                        strerror(errno));
 380                goto out;
 381        }
 382
 383        /* if path already ends in netns then don't add it again */
 384        if (*netns != '\0') {
 385                char *pdir = strrchr(vpath, '/');
 386
 387                if (!pdir)
 388                        pdir = vpath;
 389                else
 390                        pdir++;
 391
 392                if (strcmp(pdir, netns) == 0)
 393                        *pdir = '\0';
 394
 395                strcat(netns, "/");
 396        }
 397
 398        /* path to cgroup; make sure buffer has room to cat "/cgroup.procs"
 399         * to the end of the path
 400         */
 401        len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE),
 402                       "%s%s/%svrf/%s",
 403                       mnt, vpath, netns, ifindex ? name : "");
 404        if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) {
 405                fprintf(stderr, "Invalid path to cgroup2 mount\n");
 406                goto out;
 407        }
 408
 409        if (make_path(path, 0755)) {
 410                fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
 411                goto out;
 412        }
 413
 414        if (ifindex && vrf_configure_cgroup(path, ifindex))
 415                goto out;
 416
 417        /*
 418         * write pid to cgroup.procs making process part of cgroup
 419         */
 420        strcat(path, CGRP_PROC_FILE);
 421        fd = open(path, O_RDWR | O_APPEND);
 422        if (fd < 0) {
 423                fprintf(stderr, "Failed to open cgroups.procs file: %s.\n",
 424                        strerror(errno));
 425                goto out;
 426        }
 427
 428        snprintf(pid, sizeof(pid), "%d", getpid());
 429        if (write(fd, pid, strlen(pid)) < 0) {
 430                fprintf(stderr, "Failed to join cgroup\n");
 431                goto out2;
 432        }
 433
 434        rc = 0;
 435out2:
 436        close(fd);
 437out:
 438        free(mnt);
 439
 440        drop_cap();
 441
 442        return rc;
 443}
 444
 445static int do_switch(void *arg)
 446{
 447        char *vrf = arg;
 448
 449        return vrf_switch(vrf);
 450}
 451
 452static int ipvrf_exec(int argc, char **argv)
 453{
 454        if (argc < 1) {
 455                fprintf(stderr, "No VRF name specified\n");
 456                return -1;
 457        }
 458        if (argc < 2) {
 459                fprintf(stderr, "No command specified\n");
 460                return -1;
 461        }
 462
 463        return -cmd_exec(argv[1], argv + 1, !!batch_mode, do_switch, argv[0]);
 464}
 465
 466/* reset VRF association of current process to default VRF;
 467 * used by netns_exec
 468 */
 469void vrf_reset(void)
 470{
 471        char vrf[32];
 472
 473        if (vrf_identify(getpid(), vrf, sizeof(vrf)) ||
 474            (vrf[0] == '\0'))
 475                return;
 476
 477        vrf_switch("default");
 478}
 479
 480static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen)
 481{
 482        struct rtattr *linkinfo;
 483        int err;
 484
 485        if (vrf_filter.kind) {
 486                linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO);
 487
 488                err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind,
 489                                strlen(vrf_filter.kind));
 490                if (err)
 491                        return err;
 492
 493                addattr_nest_end(nlh, linkinfo);
 494        }
 495
 496        return 0;
 497}
 498
 499/* input arg is linkinfo */
 500static __u32 vrf_table_linkinfo(struct rtattr *li[])
 501{
 502        struct rtattr *attr[IFLA_VRF_MAX + 1];
 503
 504        if (li[IFLA_INFO_DATA]) {
 505                parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]);
 506
 507                if (attr[IFLA_VRF_TABLE])
 508                        return rta_getattr_u32(attr[IFLA_VRF_TABLE]);
 509        }
 510
 511        return 0;
 512}
 513
 514static int ipvrf_print(struct nlmsghdr *n)
 515{
 516        struct ifinfomsg *ifi = NLMSG_DATA(n);
 517        struct rtattr *tb[IFLA_MAX+1];
 518        struct rtattr *li[IFLA_INFO_MAX+1];
 519        int len = n->nlmsg_len;
 520        const char *name;
 521        __u32 tb_id;
 522
 523        len -= NLMSG_LENGTH(sizeof(*ifi));
 524        if (len < 0)
 525                return 0;
 526
 527        if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index)
 528                return 0;
 529
 530        parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
 531
 532        /* kernel does not support filter by master device */
 533        if (tb[IFLA_MASTER]) {
 534                int master = *(int *)RTA_DATA(tb[IFLA_MASTER]);
 535
 536                if (vrf_filter.master && master != vrf_filter.master)
 537                        return 0;
 538        }
 539
 540        if (!tb[IFLA_IFNAME]) {
 541                fprintf(stderr,
 542                        "BUG: device with ifindex %d has nil ifname\n",
 543                        ifi->ifi_index);
 544                return 0;
 545        }
 546        name = rta_getattr_str(tb[IFLA_IFNAME]);
 547
 548        /* missing LINKINFO means not VRF. e.g., kernel does not
 549         * support filtering on kind, so userspace needs to handle
 550         */
 551        if (!tb[IFLA_LINKINFO])
 552                return 0;
 553
 554        parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
 555
 556        if (!li[IFLA_INFO_KIND])
 557                return 0;
 558
 559        if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf"))
 560                return 0;
 561
 562        tb_id = vrf_table_linkinfo(li);
 563        if (!tb_id) {
 564                fprintf(stderr,
 565                        "BUG: VRF %s is missing table id\n", name);
 566                return 0;
 567        }
 568
 569        open_json_object(NULL);
 570        print_string(PRINT_ANY, "name", "%-16s", name);
 571        print_uint(PRINT_ANY, "table", " %5u", tb_id);
 572        print_string(PRINT_FP, NULL, "%s", "\n");
 573        close_json_object();
 574
 575        return 1;
 576}
 577
 578static int ipvrf_show(int argc, char **argv)
 579{
 580        struct nlmsg_chain linfo = { NULL, NULL};
 581        int rc = 0;
 582
 583        vrf_filter.kind = "vrf";
 584
 585        if (argc > 1)
 586                usage();
 587
 588        if (argc == 1) {
 589                __u32 tb_id;
 590
 591                tb_id = ipvrf_get_table(argv[0]);
 592                if (!tb_id) {
 593                        fprintf(stderr, "Invalid VRF\n");
 594                        return 1;
 595                }
 596                printf("%s %u\n", argv[0], tb_id);
 597                return 0;
 598        }
 599
 600        if (ip_link_list(ipvrf_filter_req, &linfo) == 0) {
 601                struct nlmsg_list *l;
 602                unsigned nvrf = 0;
 603
 604                new_json_obj(json);
 605
 606                print_string(PRINT_FP, NULL, "%-16s", "Name");
 607                print_string(PRINT_FP, NULL, "  %5s\n", "Table");
 608                print_string(PRINT_FP, NULL, "%s\n",
 609                             "-----------------------");
 610
 611                for (l = linfo.head; l; l = l->next)
 612                        nvrf += ipvrf_print(&l->h);
 613
 614                if (!nvrf)
 615                        print_string(PRINT_FP, NULL, "%s\n",
 616                                     "No VRF has been configured");
 617                delete_json_obj();
 618        } else
 619                rc = 1;
 620
 621        free_nlmsg_chain(&linfo);
 622
 623        return rc;
 624}
 625
 626int do_ipvrf(int argc, char **argv)
 627{
 628        if (argc == 0)
 629                return ipvrf_show(0, NULL);
 630
 631        if (matches(*argv, "identify") == 0)
 632                return ipvrf_identify(argc-1, argv+1);
 633
 634        if (matches(*argv, "pids") == 0)
 635                return ipvrf_pids(argc-1, argv+1);
 636
 637        if (matches(*argv, "exec") == 0)
 638                return ipvrf_exec(argc-1, argv+1);
 639
 640        if (matches(*argv, "show") == 0 ||
 641            matches(*argv, "lst") == 0 ||
 642            matches(*argv, "list") == 0)
 643                return ipvrf_show(argc-1, argv+1);
 644
 645        if (matches(*argv, "help") == 0)
 646                usage();
 647
 648        fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n",
 649                *argv);
 650
 651        exit(-1);
 652}
 653