linux/kernel/bpf/bpf_iter.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2020 Facebook */
   3
   4#include <linux/fs.h>
   5#include <linux/anon_inodes.h>
   6#include <linux/filter.h>
   7#include <linux/bpf.h>
   8
   9struct bpf_iter_target_info {
  10        struct list_head list;
  11        const struct bpf_iter_reg *reg_info;
  12        u32 btf_id;     /* cached value */
  13};
  14
  15struct bpf_iter_link {
  16        struct bpf_link link;
  17        struct bpf_iter_aux_info aux;
  18        struct bpf_iter_target_info *tinfo;
  19};
  20
  21struct bpf_iter_priv_data {
  22        struct bpf_iter_target_info *tinfo;
  23        const struct bpf_iter_seq_info *seq_info;
  24        struct bpf_prog *prog;
  25        u64 session_id;
  26        u64 seq_num;
  27        bool done_stop;
  28        u8 target_private[] __aligned(8);
  29};
  30
  31static struct list_head targets = LIST_HEAD_INIT(targets);
  32static DEFINE_MUTEX(targets_mutex);
  33
  34/* protect bpf_iter_link changes */
  35static DEFINE_MUTEX(link_mutex);
  36
  37/* incremented on every opened seq_file */
  38static atomic64_t session_id;
  39
  40static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
  41                            const struct bpf_iter_seq_info *seq_info);
  42
  43static void bpf_iter_inc_seq_num(struct seq_file *seq)
  44{
  45        struct bpf_iter_priv_data *iter_priv;
  46
  47        iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
  48                                 target_private);
  49        iter_priv->seq_num++;
  50}
  51
  52static void bpf_iter_dec_seq_num(struct seq_file *seq)
  53{
  54        struct bpf_iter_priv_data *iter_priv;
  55
  56        iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
  57                                 target_private);
  58        iter_priv->seq_num--;
  59}
  60
  61static void bpf_iter_done_stop(struct seq_file *seq)
  62{
  63        struct bpf_iter_priv_data *iter_priv;
  64
  65        iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
  66                                 target_private);
  67        iter_priv->done_stop = true;
  68}
  69
  70/* maximum visited objects before bailing out */
  71#define MAX_ITER_OBJECTS        1000000
  72
  73/* bpf_seq_read, a customized and simpler version for bpf iterator.
  74 * no_llseek is assumed for this file.
  75 * The following are differences from seq_read():
  76 *  . fixed buffer size (PAGE_SIZE)
  77 *  . assuming no_llseek
  78 *  . stop() may call bpf program, handling potential overflow there
  79 */
  80static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
  81                            loff_t *ppos)
  82{
  83        struct seq_file *seq = file->private_data;
  84        size_t n, offs, copied = 0;
  85        int err = 0, num_objs = 0;
  86        void *p;
  87
  88        mutex_lock(&seq->lock);
  89
  90        if (!seq->buf) {
  91                seq->size = PAGE_SIZE << 3;
  92                seq->buf = kvmalloc(seq->size, GFP_KERNEL);
  93                if (!seq->buf) {
  94                        err = -ENOMEM;
  95                        goto done;
  96                }
  97        }
  98
  99        if (seq->count) {
 100                n = min(seq->count, size);
 101                err = copy_to_user(buf, seq->buf + seq->from, n);
 102                if (err) {
 103                        err = -EFAULT;
 104                        goto done;
 105                }
 106                seq->count -= n;
 107                seq->from += n;
 108                copied = n;
 109                goto done;
 110        }
 111
 112        seq->from = 0;
 113        p = seq->op->start(seq, &seq->index);
 114        if (!p)
 115                goto stop;
 116        if (IS_ERR(p)) {
 117                err = PTR_ERR(p);
 118                seq->op->stop(seq, p);
 119                seq->count = 0;
 120                goto done;
 121        }
 122
 123        err = seq->op->show(seq, p);
 124        if (err > 0) {
 125                /* object is skipped, decrease seq_num, so next
 126                 * valid object can reuse the same seq_num.
 127                 */
 128                bpf_iter_dec_seq_num(seq);
 129                seq->count = 0;
 130        } else if (err < 0 || seq_has_overflowed(seq)) {
 131                if (!err)
 132                        err = -E2BIG;
 133                seq->op->stop(seq, p);
 134                seq->count = 0;
 135                goto done;
 136        }
 137
 138        while (1) {
 139                loff_t pos = seq->index;
 140
 141                num_objs++;
 142                offs = seq->count;
 143                p = seq->op->next(seq, p, &seq->index);
 144                if (pos == seq->index) {
 145                        pr_info_ratelimited("buggy seq_file .next function %ps "
 146                                "did not updated position index\n",
 147                                seq->op->next);
 148                        seq->index++;
 149                }
 150
 151                if (IS_ERR_OR_NULL(p))
 152                        break;
 153
 154                /* got a valid next object, increase seq_num */
 155                bpf_iter_inc_seq_num(seq);
 156
 157                if (seq->count >= size)
 158                        break;
 159
 160                if (num_objs >= MAX_ITER_OBJECTS) {
 161                        if (offs == 0) {
 162                                err = -EAGAIN;
 163                                seq->op->stop(seq, p);
 164                                goto done;
 165                        }
 166                        break;
 167                }
 168
 169                err = seq->op->show(seq, p);
 170                if (err > 0) {
 171                        bpf_iter_dec_seq_num(seq);
 172                        seq->count = offs;
 173                } else if (err < 0 || seq_has_overflowed(seq)) {
 174                        seq->count = offs;
 175                        if (offs == 0) {
 176                                if (!err)
 177                                        err = -E2BIG;
 178                                seq->op->stop(seq, p);
 179                                goto done;
 180                        }
 181                        break;
 182                }
 183        }
 184stop:
 185        offs = seq->count;
 186        /* bpf program called if !p */
 187        seq->op->stop(seq, p);
 188        if (!p) {
 189                if (!seq_has_overflowed(seq)) {
 190                        bpf_iter_done_stop(seq);
 191                } else {
 192                        seq->count = offs;
 193                        if (offs == 0) {
 194                                err = -E2BIG;
 195                                goto done;
 196                        }
 197                }
 198        }
 199
 200        n = min(seq->count, size);
 201        err = copy_to_user(buf, seq->buf, n);
 202        if (err) {
 203                err = -EFAULT;
 204                goto done;
 205        }
 206        copied = n;
 207        seq->count -= n;
 208        seq->from = n;
 209done:
 210        if (!copied)
 211                copied = err;
 212        else
 213                *ppos += copied;
 214        mutex_unlock(&seq->lock);
 215        return copied;
 216}
 217
 218static const struct bpf_iter_seq_info *
 219__get_seq_info(struct bpf_iter_link *link)
 220{
 221        const struct bpf_iter_seq_info *seq_info;
 222
 223        if (link->aux.map) {
 224                seq_info = link->aux.map->ops->iter_seq_info;
 225                if (seq_info)
 226                        return seq_info;
 227        }
 228
 229        return link->tinfo->reg_info->seq_info;
 230}
 231
 232static int iter_open(struct inode *inode, struct file *file)
 233{
 234        struct bpf_iter_link *link = inode->i_private;
 235
 236        return prepare_seq_file(file, link, __get_seq_info(link));
 237}
 238
 239static int iter_release(struct inode *inode, struct file *file)
 240{
 241        struct bpf_iter_priv_data *iter_priv;
 242        struct seq_file *seq;
 243
 244        seq = file->private_data;
 245        if (!seq)
 246                return 0;
 247
 248        iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
 249                                 target_private);
 250
 251        if (iter_priv->seq_info->fini_seq_private)
 252                iter_priv->seq_info->fini_seq_private(seq->private);
 253
 254        bpf_prog_put(iter_priv->prog);
 255        seq->private = iter_priv;
 256
 257        return seq_release_private(inode, file);
 258}
 259
 260const struct file_operations bpf_iter_fops = {
 261        .open           = iter_open,
 262        .llseek         = no_llseek,
 263        .read           = bpf_seq_read,
 264        .release        = iter_release,
 265};
 266
 267/* The argument reg_info will be cached in bpf_iter_target_info.
 268 * The common practice is to declare target reg_info as
 269 * a const static variable and passed as an argument to
 270 * bpf_iter_reg_target().
 271 */
 272int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
 273{
 274        struct bpf_iter_target_info *tinfo;
 275
 276        tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
 277        if (!tinfo)
 278                return -ENOMEM;
 279
 280        tinfo->reg_info = reg_info;
 281        INIT_LIST_HEAD(&tinfo->list);
 282
 283        mutex_lock(&targets_mutex);
 284        list_add(&tinfo->list, &targets);
 285        mutex_unlock(&targets_mutex);
 286
 287        return 0;
 288}
 289
 290void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
 291{
 292        struct bpf_iter_target_info *tinfo;
 293        bool found = false;
 294
 295        mutex_lock(&targets_mutex);
 296        list_for_each_entry(tinfo, &targets, list) {
 297                if (reg_info == tinfo->reg_info) {
 298                        list_del(&tinfo->list);
 299                        kfree(tinfo);
 300                        found = true;
 301                        break;
 302                }
 303        }
 304        mutex_unlock(&targets_mutex);
 305
 306        WARN_ON(found == false);
 307}
 308
 309static void cache_btf_id(struct bpf_iter_target_info *tinfo,
 310                         struct bpf_prog *prog)
 311{
 312        tinfo->btf_id = prog->aux->attach_btf_id;
 313}
 314
 315bool bpf_iter_prog_supported(struct bpf_prog *prog)
 316{
 317        const char *attach_fname = prog->aux->attach_func_name;
 318        u32 prog_btf_id = prog->aux->attach_btf_id;
 319        const char *prefix = BPF_ITER_FUNC_PREFIX;
 320        struct bpf_iter_target_info *tinfo;
 321        int prefix_len = strlen(prefix);
 322        bool supported = false;
 323
 324        if (strncmp(attach_fname, prefix, prefix_len))
 325                return false;
 326
 327        mutex_lock(&targets_mutex);
 328        list_for_each_entry(tinfo, &targets, list) {
 329                if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
 330                        supported = true;
 331                        break;
 332                }
 333                if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
 334                        cache_btf_id(tinfo, prog);
 335                        supported = true;
 336                        break;
 337                }
 338        }
 339        mutex_unlock(&targets_mutex);
 340
 341        if (supported) {
 342                prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
 343                prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
 344        }
 345
 346        return supported;
 347}
 348
 349static void bpf_iter_link_release(struct bpf_link *link)
 350{
 351        struct bpf_iter_link *iter_link =
 352                container_of(link, struct bpf_iter_link, link);
 353
 354        if (iter_link->tinfo->reg_info->detach_target)
 355                iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 356}
 357
 358static void bpf_iter_link_dealloc(struct bpf_link *link)
 359{
 360        struct bpf_iter_link *iter_link =
 361                container_of(link, struct bpf_iter_link, link);
 362
 363        kfree(iter_link);
 364}
 365
 366static int bpf_iter_link_replace(struct bpf_link *link,
 367                                 struct bpf_prog *new_prog,
 368                                 struct bpf_prog *old_prog)
 369{
 370        int ret = 0;
 371
 372        mutex_lock(&link_mutex);
 373        if (old_prog && link->prog != old_prog) {
 374                ret = -EPERM;
 375                goto out_unlock;
 376        }
 377
 378        if (link->prog->type != new_prog->type ||
 379            link->prog->expected_attach_type != new_prog->expected_attach_type ||
 380            link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
 381                ret = -EINVAL;
 382                goto out_unlock;
 383        }
 384
 385        old_prog = xchg(&link->prog, new_prog);
 386        bpf_prog_put(old_prog);
 387
 388out_unlock:
 389        mutex_unlock(&link_mutex);
 390        return ret;
 391}
 392
 393static void bpf_iter_link_show_fdinfo(const struct bpf_link *link,
 394                                      struct seq_file *seq)
 395{
 396        struct bpf_iter_link *iter_link =
 397                container_of(link, struct bpf_iter_link, link);
 398        bpf_iter_show_fdinfo_t show_fdinfo;
 399
 400        seq_printf(seq,
 401                   "target_name:\t%s\n",
 402                   iter_link->tinfo->reg_info->target);
 403
 404        show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo;
 405        if (show_fdinfo)
 406                show_fdinfo(&iter_link->aux, seq);
 407}
 408
 409static int bpf_iter_link_fill_link_info(const struct bpf_link *link,
 410                                        struct bpf_link_info *info)
 411{
 412        struct bpf_iter_link *iter_link =
 413                container_of(link, struct bpf_iter_link, link);
 414        char __user *ubuf = u64_to_user_ptr(info->iter.target_name);
 415        bpf_iter_fill_link_info_t fill_link_info;
 416        u32 ulen = info->iter.target_name_len;
 417        const char *target_name;
 418        u32 target_len;
 419
 420        if (!ulen ^ !ubuf)
 421                return -EINVAL;
 422
 423        target_name = iter_link->tinfo->reg_info->target;
 424        target_len =  strlen(target_name);
 425        info->iter.target_name_len = target_len + 1;
 426
 427        if (ubuf) {
 428                if (ulen >= target_len + 1) {
 429                        if (copy_to_user(ubuf, target_name, target_len + 1))
 430                                return -EFAULT;
 431                } else {
 432                        char zero = '\0';
 433
 434                        if (copy_to_user(ubuf, target_name, ulen - 1))
 435                                return -EFAULT;
 436                        if (put_user(zero, ubuf + ulen - 1))
 437                                return -EFAULT;
 438                        return -ENOSPC;
 439                }
 440        }
 441
 442        fill_link_info = iter_link->tinfo->reg_info->fill_link_info;
 443        if (fill_link_info)
 444                return fill_link_info(&iter_link->aux, info);
 445
 446        return 0;
 447}
 448
 449static const struct bpf_link_ops bpf_iter_link_lops = {
 450        .release = bpf_iter_link_release,
 451        .dealloc = bpf_iter_link_dealloc,
 452        .update_prog = bpf_iter_link_replace,
 453        .show_fdinfo = bpf_iter_link_show_fdinfo,
 454        .fill_link_info = bpf_iter_link_fill_link_info,
 455};
 456
 457bool bpf_link_is_iter(struct bpf_link *link)
 458{
 459        return link->ops == &bpf_iter_link_lops;
 460}
 461
 462int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 463{
 464        union bpf_iter_link_info __user *ulinfo;
 465        struct bpf_link_primer link_primer;
 466        struct bpf_iter_target_info *tinfo;
 467        union bpf_iter_link_info linfo;
 468        struct bpf_iter_link *link;
 469        u32 prog_btf_id, linfo_len;
 470        bool existed = false;
 471        int err;
 472
 473        if (attr->link_create.target_fd || attr->link_create.flags)
 474                return -EINVAL;
 475
 476        memset(&linfo, 0, sizeof(union bpf_iter_link_info));
 477
 478        ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
 479        linfo_len = attr->link_create.iter_info_len;
 480        if (!ulinfo ^ !linfo_len)
 481                return -EINVAL;
 482
 483        if (ulinfo) {
 484                err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
 485                                               linfo_len);
 486                if (err)
 487                        return err;
 488                linfo_len = min_t(u32, linfo_len, sizeof(linfo));
 489                if (copy_from_user(&linfo, ulinfo, linfo_len))
 490                        return -EFAULT;
 491        }
 492
 493        prog_btf_id = prog->aux->attach_btf_id;
 494        mutex_lock(&targets_mutex);
 495        list_for_each_entry(tinfo, &targets, list) {
 496                if (tinfo->btf_id == prog_btf_id) {
 497                        existed = true;
 498                        break;
 499                }
 500        }
 501        mutex_unlock(&targets_mutex);
 502        if (!existed)
 503                return -ENOENT;
 504
 505        link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
 506        if (!link)
 507                return -ENOMEM;
 508
 509        bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
 510        link->tinfo = tinfo;
 511
 512        err  = bpf_link_prime(&link->link, &link_primer);
 513        if (err) {
 514                kfree(link);
 515                return err;
 516        }
 517
 518        if (tinfo->reg_info->attach_target) {
 519                err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
 520                if (err) {
 521                        bpf_link_cleanup(&link_primer);
 522                        return err;
 523                }
 524        }
 525
 526        return bpf_link_settle(&link_primer);
 527}
 528
 529static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
 530                          struct bpf_iter_target_info *tinfo,
 531                          const struct bpf_iter_seq_info *seq_info,
 532                          struct bpf_prog *prog)
 533{
 534        priv_data->tinfo = tinfo;
 535        priv_data->seq_info = seq_info;
 536        priv_data->prog = prog;
 537        priv_data->session_id = atomic64_inc_return(&session_id);
 538        priv_data->seq_num = 0;
 539        priv_data->done_stop = false;
 540}
 541
 542static int prepare_seq_file(struct file *file, struct bpf_iter_link *link,
 543                            const struct bpf_iter_seq_info *seq_info)
 544{
 545        struct bpf_iter_priv_data *priv_data;
 546        struct bpf_iter_target_info *tinfo;
 547        struct bpf_prog *prog;
 548        u32 total_priv_dsize;
 549        struct seq_file *seq;
 550        int err = 0;
 551
 552        mutex_lock(&link_mutex);
 553        prog = link->link.prog;
 554        bpf_prog_inc(prog);
 555        mutex_unlock(&link_mutex);
 556
 557        tinfo = link->tinfo;
 558        total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
 559                           seq_info->seq_priv_size;
 560        priv_data = __seq_open_private(file, seq_info->seq_ops,
 561                                       total_priv_dsize);
 562        if (!priv_data) {
 563                err = -ENOMEM;
 564                goto release_prog;
 565        }
 566
 567        if (seq_info->init_seq_private) {
 568                err = seq_info->init_seq_private(priv_data->target_private, &link->aux);
 569                if (err)
 570                        goto release_seq_file;
 571        }
 572
 573        init_seq_meta(priv_data, tinfo, seq_info, prog);
 574        seq = file->private_data;
 575        seq->private = priv_data->target_private;
 576
 577        return 0;
 578
 579release_seq_file:
 580        seq_release_private(file->f_inode, file);
 581        file->private_data = NULL;
 582release_prog:
 583        bpf_prog_put(prog);
 584        return err;
 585}
 586
 587int bpf_iter_new_fd(struct bpf_link *link)
 588{
 589        struct bpf_iter_link *iter_link;
 590        struct file *file;
 591        unsigned int flags;
 592        int err, fd;
 593
 594        if (link->ops != &bpf_iter_link_lops)
 595                return -EINVAL;
 596
 597        flags = O_RDONLY | O_CLOEXEC;
 598        fd = get_unused_fd_flags(flags);
 599        if (fd < 0)
 600                return fd;
 601
 602        file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
 603        if (IS_ERR(file)) {
 604                err = PTR_ERR(file);
 605                goto free_fd;
 606        }
 607
 608        iter_link = container_of(link, struct bpf_iter_link, link);
 609        err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link));
 610        if (err)
 611                goto free_file;
 612
 613        fd_install(fd, file);
 614        return fd;
 615
 616free_file:
 617        fput(file);
 618free_fd:
 619        put_unused_fd(fd);
 620        return err;
 621}
 622
 623struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
 624{
 625        struct bpf_iter_priv_data *iter_priv;
 626        struct seq_file *seq;
 627        void *seq_priv;
 628
 629        seq = meta->seq;
 630        if (seq->file->f_op != &bpf_iter_fops)
 631                return NULL;
 632
 633        seq_priv = seq->private;
 634        iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
 635                                 target_private);
 636
 637        if (in_stop && iter_priv->done_stop)
 638                return NULL;
 639
 640        meta->session_id = iter_priv->session_id;
 641        meta->seq_num = iter_priv->seq_num;
 642
 643        return iter_priv->prog;
 644}
 645
 646int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 647{
 648        int ret;
 649
 650        rcu_read_lock();
 651        migrate_disable();
 652        ret = BPF_PROG_RUN(prog, ctx);
 653        migrate_enable();
 654        rcu_read_unlock();
 655
 656        /* bpf program can only return 0 or 1:
 657         *  0 : okay
 658         *  1 : retry the same object
 659         * The bpf_iter_run_prog() return value
 660         * will be seq_ops->show() return value.
 661         */
 662        return ret == 0 ? 0 : -EAGAIN;
 663}
 664