linux/fs/btrfs/ref-verify.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2014 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/stacktrace.h>
   8#include "ctree.h"
   9#include "disk-io.h"
  10#include "locking.h"
  11#include "delayed-ref.h"
  12#include "ref-verify.h"
  13
  14/*
  15 * Used to keep track the roots and number of refs each root has for a given
  16 * bytenr.  This just tracks the number of direct references, no shared
  17 * references.
  18 */
  19struct root_entry {
  20        u64 root_objectid;
  21        u64 num_refs;
  22        struct rb_node node;
  23};
  24
  25/*
  26 * These are meant to represent what should exist in the extent tree, these can
  27 * be used to verify the extent tree is consistent as these should all match
  28 * what the extent tree says.
  29 */
  30struct ref_entry {
  31        u64 root_objectid;
  32        u64 parent;
  33        u64 owner;
  34        u64 offset;
  35        u64 num_refs;
  36        struct rb_node node;
  37};
  38
  39#define MAX_TRACE       16
  40
  41/*
  42 * Whenever we add/remove a reference we record the action.  The action maps
  43 * back to the delayed ref action.  We hold the ref we are changing in the
  44 * action so we can account for the history properly, and we record the root we
  45 * were called with since it could be different from ref_root.  We also store
  46 * stack traces because thats how I roll.
  47 */
  48struct ref_action {
  49        int action;
  50        u64 root;
  51        struct ref_entry ref;
  52        struct list_head list;
  53        unsigned long trace[MAX_TRACE];
  54        unsigned int trace_len;
  55};
  56
  57/*
  58 * One of these for every block we reference, it holds the roots and references
  59 * to it as well as all of the ref actions that have occured to it.  We never
  60 * free it until we unmount the file system in order to make sure re-allocations
  61 * are happening properly.
  62 */
  63struct block_entry {
  64        u64 bytenr;
  65        u64 len;
  66        u64 num_refs;
  67        int metadata;
  68        int from_disk;
  69        struct rb_root roots;
  70        struct rb_root refs;
  71        struct rb_node node;
  72        struct list_head actions;
  73};
  74
  75static struct block_entry *insert_block_entry(struct rb_root *root,
  76                                              struct block_entry *be)
  77{
  78        struct rb_node **p = &root->rb_node;
  79        struct rb_node *parent_node = NULL;
  80        struct block_entry *entry;
  81
  82        while (*p) {
  83                parent_node = *p;
  84                entry = rb_entry(parent_node, struct block_entry, node);
  85                if (entry->bytenr > be->bytenr)
  86                        p = &(*p)->rb_left;
  87                else if (entry->bytenr < be->bytenr)
  88                        p = &(*p)->rb_right;
  89                else
  90                        return entry;
  91        }
  92
  93        rb_link_node(&be->node, parent_node, p);
  94        rb_insert_color(&be->node, root);
  95        return NULL;
  96}
  97
  98static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
  99{
 100        struct rb_node *n;
 101        struct block_entry *entry = NULL;
 102
 103        n = root->rb_node;
 104        while (n) {
 105                entry = rb_entry(n, struct block_entry, node);
 106                if (entry->bytenr < bytenr)
 107                        n = n->rb_right;
 108                else if (entry->bytenr > bytenr)
 109                        n = n->rb_left;
 110                else
 111                        return entry;
 112        }
 113        return NULL;
 114}
 115
 116static struct root_entry *insert_root_entry(struct rb_root *root,
 117                                            struct root_entry *re)
 118{
 119        struct rb_node **p = &root->rb_node;
 120        struct rb_node *parent_node = NULL;
 121        struct root_entry *entry;
 122
 123        while (*p) {
 124                parent_node = *p;
 125                entry = rb_entry(parent_node, struct root_entry, node);
 126                if (entry->root_objectid > re->root_objectid)
 127                        p = &(*p)->rb_left;
 128                else if (entry->root_objectid < re->root_objectid)
 129                        p = &(*p)->rb_right;
 130                else
 131                        return entry;
 132        }
 133
 134        rb_link_node(&re->node, parent_node, p);
 135        rb_insert_color(&re->node, root);
 136        return NULL;
 137
 138}
 139
 140static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
 141{
 142        if (ref1->root_objectid < ref2->root_objectid)
 143                return -1;
 144        if (ref1->root_objectid > ref2->root_objectid)
 145                return 1;
 146        if (ref1->parent < ref2->parent)
 147                return -1;
 148        if (ref1->parent > ref2->parent)
 149                return 1;
 150        if (ref1->owner < ref2->owner)
 151                return -1;
 152        if (ref1->owner > ref2->owner)
 153                return 1;
 154        if (ref1->offset < ref2->offset)
 155                return -1;
 156        if (ref1->offset > ref2->offset)
 157                return 1;
 158        return 0;
 159}
 160
 161static struct ref_entry *insert_ref_entry(struct rb_root *root,
 162                                          struct ref_entry *ref)
 163{
 164        struct rb_node **p = &root->rb_node;
 165        struct rb_node *parent_node = NULL;
 166        struct ref_entry *entry;
 167        int cmp;
 168
 169        while (*p) {
 170                parent_node = *p;
 171                entry = rb_entry(parent_node, struct ref_entry, node);
 172                cmp = comp_refs(entry, ref);
 173                if (cmp > 0)
 174                        p = &(*p)->rb_left;
 175                else if (cmp < 0)
 176                        p = &(*p)->rb_right;
 177                else
 178                        return entry;
 179        }
 180
 181        rb_link_node(&ref->node, parent_node, p);
 182        rb_insert_color(&ref->node, root);
 183        return NULL;
 184
 185}
 186
 187static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
 188{
 189        struct rb_node *n;
 190        struct root_entry *entry = NULL;
 191
 192        n = root->rb_node;
 193        while (n) {
 194                entry = rb_entry(n, struct root_entry, node);
 195                if (entry->root_objectid < objectid)
 196                        n = n->rb_right;
 197                else if (entry->root_objectid > objectid)
 198                        n = n->rb_left;
 199                else
 200                        return entry;
 201        }
 202        return NULL;
 203}
 204
 205#ifdef CONFIG_STACKTRACE
 206static void __save_stack_trace(struct ref_action *ra)
 207{
 208        struct stack_trace stack_trace;
 209
 210        stack_trace.max_entries = MAX_TRACE;
 211        stack_trace.nr_entries = 0;
 212        stack_trace.entries = ra->trace;
 213        stack_trace.skip = 2;
 214        save_stack_trace(&stack_trace);
 215        ra->trace_len = stack_trace.nr_entries;
 216}
 217
 218static void __print_stack_trace(struct btrfs_fs_info *fs_info,
 219                                struct ref_action *ra)
 220{
 221        struct stack_trace trace;
 222
 223        if (ra->trace_len == 0) {
 224                btrfs_err(fs_info, "  ref-verify: no stacktrace");
 225                return;
 226        }
 227        trace.nr_entries = ra->trace_len;
 228        trace.entries = ra->trace;
 229        print_stack_trace(&trace, 2);
 230}
 231#else
 232static void inline __save_stack_trace(struct ref_action *ra)
 233{
 234}
 235
 236static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
 237                                       struct ref_action *ra)
 238{
 239        btrfs_err(fs_info, "  ref-verify: no stacktrace support");
 240}
 241#endif
 242
 243static void free_block_entry(struct block_entry *be)
 244{
 245        struct root_entry *re;
 246        struct ref_entry *ref;
 247        struct ref_action *ra;
 248        struct rb_node *n;
 249
 250        while ((n = rb_first(&be->roots))) {
 251                re = rb_entry(n, struct root_entry, node);
 252                rb_erase(&re->node, &be->roots);
 253                kfree(re);
 254        }
 255
 256        while((n = rb_first(&be->refs))) {
 257                ref = rb_entry(n, struct ref_entry, node);
 258                rb_erase(&ref->node, &be->refs);
 259                kfree(ref);
 260        }
 261
 262        while (!list_empty(&be->actions)) {
 263                ra = list_first_entry(&be->actions, struct ref_action,
 264                                      list);
 265                list_del(&ra->list);
 266                kfree(ra);
 267        }
 268        kfree(be);
 269}
 270
 271static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
 272                                           u64 bytenr, u64 len,
 273                                           u64 root_objectid)
 274{
 275        struct block_entry *be = NULL, *exist;
 276        struct root_entry *re = NULL;
 277
 278        re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
 279        be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
 280        if (!be || !re) {
 281                kfree(re);
 282                kfree(be);
 283                return ERR_PTR(-ENOMEM);
 284        }
 285        be->bytenr = bytenr;
 286        be->len = len;
 287
 288        re->root_objectid = root_objectid;
 289        re->num_refs = 0;
 290
 291        spin_lock(&fs_info->ref_verify_lock);
 292        exist = insert_block_entry(&fs_info->block_tree, be);
 293        if (exist) {
 294                if (root_objectid) {
 295                        struct root_entry *exist_re;
 296
 297                        exist_re = insert_root_entry(&exist->roots, re);
 298                        if (exist_re)
 299                                kfree(re);
 300                }
 301                kfree(be);
 302                return exist;
 303        }
 304
 305        be->num_refs = 0;
 306        be->metadata = 0;
 307        be->from_disk = 0;
 308        be->roots = RB_ROOT;
 309        be->refs = RB_ROOT;
 310        INIT_LIST_HEAD(&be->actions);
 311        if (root_objectid)
 312                insert_root_entry(&be->roots, re);
 313        else
 314                kfree(re);
 315        return be;
 316}
 317
 318static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
 319                          u64 parent, u64 bytenr, int level)
 320{
 321        struct block_entry *be;
 322        struct root_entry *re;
 323        struct ref_entry *ref = NULL, *exist;
 324
 325        ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
 326        if (!ref)
 327                return -ENOMEM;
 328
 329        if (parent)
 330                ref->root_objectid = 0;
 331        else
 332                ref->root_objectid = ref_root;
 333        ref->parent = parent;
 334        ref->owner = level;
 335        ref->offset = 0;
 336        ref->num_refs = 1;
 337
 338        be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
 339        if (IS_ERR(be)) {
 340                kfree(ref);
 341                return PTR_ERR(be);
 342        }
 343        be->num_refs++;
 344        be->from_disk = 1;
 345        be->metadata = 1;
 346
 347        if (!parent) {
 348                ASSERT(ref_root);
 349                re = lookup_root_entry(&be->roots, ref_root);
 350                ASSERT(re);
 351                re->num_refs++;
 352        }
 353        exist = insert_ref_entry(&be->refs, ref);
 354        if (exist) {
 355                exist->num_refs++;
 356                kfree(ref);
 357        }
 358        spin_unlock(&fs_info->ref_verify_lock);
 359
 360        return 0;
 361}
 362
 363static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
 364                               u64 parent, u32 num_refs, u64 bytenr,
 365                               u64 num_bytes)
 366{
 367        struct block_entry *be;
 368        struct ref_entry *ref;
 369
 370        ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
 371        if (!ref)
 372                return -ENOMEM;
 373        be = add_block_entry(fs_info, bytenr, num_bytes, 0);
 374        if (IS_ERR(be)) {
 375                kfree(ref);
 376                return PTR_ERR(be);
 377        }
 378        be->num_refs += num_refs;
 379
 380        ref->parent = parent;
 381        ref->num_refs = num_refs;
 382        if (insert_ref_entry(&be->refs, ref)) {
 383                spin_unlock(&fs_info->ref_verify_lock);
 384                btrfs_err(fs_info, "existing shared ref when reading from disk?");
 385                kfree(ref);
 386                return -EINVAL;
 387        }
 388        spin_unlock(&fs_info->ref_verify_lock);
 389        return 0;
 390}
 391
 392static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
 393                               struct extent_buffer *leaf,
 394                               struct btrfs_extent_data_ref *dref,
 395                               u64 bytenr, u64 num_bytes)
 396{
 397        struct block_entry *be;
 398        struct ref_entry *ref;
 399        struct root_entry *re;
 400        u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
 401        u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
 402        u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
 403        u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
 404
 405        ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
 406        if (!ref)
 407                return -ENOMEM;
 408        be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
 409        if (IS_ERR(be)) {
 410                kfree(ref);
 411                return PTR_ERR(be);
 412        }
 413        be->num_refs += num_refs;
 414
 415        ref->parent = 0;
 416        ref->owner = owner;
 417        ref->root_objectid = ref_root;
 418        ref->offset = offset;
 419        ref->num_refs = num_refs;
 420        if (insert_ref_entry(&be->refs, ref)) {
 421                spin_unlock(&fs_info->ref_verify_lock);
 422                btrfs_err(fs_info, "existing ref when reading from disk?");
 423                kfree(ref);
 424                return -EINVAL;
 425        }
 426
 427        re = lookup_root_entry(&be->roots, ref_root);
 428        if (!re) {
 429                spin_unlock(&fs_info->ref_verify_lock);
 430                btrfs_err(fs_info, "missing root in new block entry?");
 431                return -EINVAL;
 432        }
 433        re->num_refs += num_refs;
 434        spin_unlock(&fs_info->ref_verify_lock);
 435        return 0;
 436}
 437
 438static int process_extent_item(struct btrfs_fs_info *fs_info,
 439                               struct btrfs_path *path, struct btrfs_key *key,
 440                               int slot, int *tree_block_level)
 441{
 442        struct btrfs_extent_item *ei;
 443        struct btrfs_extent_inline_ref *iref;
 444        struct btrfs_extent_data_ref *dref;
 445        struct btrfs_shared_data_ref *sref;
 446        struct extent_buffer *leaf = path->nodes[0];
 447        u32 item_size = btrfs_item_size_nr(leaf, slot);
 448        unsigned long end, ptr;
 449        u64 offset, flags, count;
 450        int type, ret;
 451
 452        ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 453        flags = btrfs_extent_flags(leaf, ei);
 454
 455        if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
 456            flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 457                struct btrfs_tree_block_info *info;
 458
 459                info = (struct btrfs_tree_block_info *)(ei + 1);
 460                *tree_block_level = btrfs_tree_block_level(leaf, info);
 461                iref = (struct btrfs_extent_inline_ref *)(info + 1);
 462        } else {
 463                if (key->type == BTRFS_METADATA_ITEM_KEY)
 464                        *tree_block_level = key->offset;
 465                iref = (struct btrfs_extent_inline_ref *)(ei + 1);
 466        }
 467
 468        ptr = (unsigned long)iref;
 469        end = (unsigned long)ei + item_size;
 470        while (ptr < end) {
 471                iref = (struct btrfs_extent_inline_ref *)ptr;
 472                type = btrfs_extent_inline_ref_type(leaf, iref);
 473                offset = btrfs_extent_inline_ref_offset(leaf, iref);
 474                switch (type) {
 475                case BTRFS_TREE_BLOCK_REF_KEY:
 476                        ret = add_tree_block(fs_info, offset, 0, key->objectid,
 477                                             *tree_block_level);
 478                        break;
 479                case BTRFS_SHARED_BLOCK_REF_KEY:
 480                        ret = add_tree_block(fs_info, 0, offset, key->objectid,
 481                                             *tree_block_level);
 482                        break;
 483                case BTRFS_EXTENT_DATA_REF_KEY:
 484                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
 485                        ret = add_extent_data_ref(fs_info, leaf, dref,
 486                                                  key->objectid, key->offset);
 487                        break;
 488                case BTRFS_SHARED_DATA_REF_KEY:
 489                        sref = (struct btrfs_shared_data_ref *)(iref + 1);
 490                        count = btrfs_shared_data_ref_count(leaf, sref);
 491                        ret = add_shared_data_ref(fs_info, offset, count,
 492                                                  key->objectid, key->offset);
 493                        break;
 494                default:
 495                        btrfs_err(fs_info, "invalid key type in iref");
 496                        ret = -EINVAL;
 497                        break;
 498                }
 499                if (ret)
 500                        break;
 501                ptr += btrfs_extent_inline_ref_size(type);
 502        }
 503        return ret;
 504}
 505
 506static int process_leaf(struct btrfs_root *root,
 507                        struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
 508{
 509        struct btrfs_fs_info *fs_info = root->fs_info;
 510        struct extent_buffer *leaf = path->nodes[0];
 511        struct btrfs_extent_data_ref *dref;
 512        struct btrfs_shared_data_ref *sref;
 513        u32 count;
 514        int i = 0, tree_block_level = 0, ret;
 515        struct btrfs_key key;
 516        int nritems = btrfs_header_nritems(leaf);
 517
 518        for (i = 0; i < nritems; i++) {
 519                btrfs_item_key_to_cpu(leaf, &key, i);
 520                switch (key.type) {
 521                case BTRFS_EXTENT_ITEM_KEY:
 522                        *num_bytes = key.offset;
 523                case BTRFS_METADATA_ITEM_KEY:
 524                        *bytenr = key.objectid;
 525                        ret = process_extent_item(fs_info, path, &key, i,
 526                                                  &tree_block_level);
 527                        break;
 528                case BTRFS_TREE_BLOCK_REF_KEY:
 529                        ret = add_tree_block(fs_info, key.offset, 0,
 530                                             key.objectid, tree_block_level);
 531                        break;
 532                case BTRFS_SHARED_BLOCK_REF_KEY:
 533                        ret = add_tree_block(fs_info, 0, key.offset,
 534                                             key.objectid, tree_block_level);
 535                        break;
 536                case BTRFS_EXTENT_DATA_REF_KEY:
 537                        dref = btrfs_item_ptr(leaf, i,
 538                                              struct btrfs_extent_data_ref);
 539                        ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
 540                                                  *num_bytes);
 541                        break;
 542                case BTRFS_SHARED_DATA_REF_KEY:
 543                        sref = btrfs_item_ptr(leaf, i,
 544                                              struct btrfs_shared_data_ref);
 545                        count = btrfs_shared_data_ref_count(leaf, sref);
 546                        ret = add_shared_data_ref(fs_info, key.offset, count,
 547                                                  *bytenr, *num_bytes);
 548                        break;
 549                default:
 550                        break;
 551                }
 552                if (ret)
 553                        break;
 554        }
 555        return ret;
 556}
 557
 558/* Walk down to the leaf from the given level */
 559static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 560                          int level, u64 *bytenr, u64 *num_bytes)
 561{
 562        struct btrfs_fs_info *fs_info = root->fs_info;
 563        struct extent_buffer *eb;
 564        u64 block_bytenr, gen;
 565        int ret = 0;
 566
 567        while (level >= 0) {
 568                if (level) {
 569                        struct btrfs_key first_key;
 570
 571                        block_bytenr = btrfs_node_blockptr(path->nodes[level],
 572                                                           path->slots[level]);
 573                        gen = btrfs_node_ptr_generation(path->nodes[level],
 574                                                        path->slots[level]);
 575                        btrfs_node_key_to_cpu(path->nodes[level], &first_key,
 576                                              path->slots[level]);
 577                        eb = read_tree_block(fs_info, block_bytenr, gen,
 578                                             level - 1, &first_key);
 579                        if (IS_ERR(eb))
 580                                return PTR_ERR(eb);
 581                        if (!extent_buffer_uptodate(eb)) {
 582                                free_extent_buffer(eb);
 583                                return -EIO;
 584                        }
 585                        btrfs_tree_read_lock(eb);
 586                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 587                        path->nodes[level-1] = eb;
 588                        path->slots[level-1] = 0;
 589                        path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
 590                } else {
 591                        ret = process_leaf(root, path, bytenr, num_bytes);
 592                        if (ret)
 593                                break;
 594                }
 595                level--;
 596        }
 597        return ret;
 598}
 599
 600/* Walk up to the next node that needs to be processed */
 601static int walk_up_tree(struct btrfs_path *path, int *level)
 602{
 603        int l;
 604
 605        for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
 606                if (!path->nodes[l])
 607                        continue;
 608                if (l) {
 609                        path->slots[l]++;
 610                        if (path->slots[l] <
 611                            btrfs_header_nritems(path->nodes[l])) {
 612                                *level = l;
 613                                return 0;
 614                        }
 615                }
 616                btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
 617                free_extent_buffer(path->nodes[l]);
 618                path->nodes[l] = NULL;
 619                path->slots[l] = 0;
 620                path->locks[l] = 0;
 621        }
 622
 623        return 1;
 624}
 625
 626static void dump_ref_action(struct btrfs_fs_info *fs_info,
 627                            struct ref_action *ra)
 628{
 629        btrfs_err(fs_info,
 630"  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
 631                  ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
 632                  ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
 633        __print_stack_trace(fs_info, ra);
 634}
 635
 636/*
 637 * Dumps all the information from the block entry to printk, it's going to be
 638 * awesome.
 639 */
 640static void dump_block_entry(struct btrfs_fs_info *fs_info,
 641                             struct block_entry *be)
 642{
 643        struct ref_entry *ref;
 644        struct root_entry *re;
 645        struct ref_action *ra;
 646        struct rb_node *n;
 647
 648        btrfs_err(fs_info,
 649"dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
 650                  be->bytenr, be->len, be->num_refs, be->metadata,
 651                  be->from_disk);
 652
 653        for (n = rb_first(&be->refs); n; n = rb_next(n)) {
 654                ref = rb_entry(n, struct ref_entry, node);
 655                btrfs_err(fs_info,
 656"  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
 657                          ref->root_objectid, ref->parent, ref->owner,
 658                          ref->offset, ref->num_refs);
 659        }
 660
 661        for (n = rb_first(&be->roots); n; n = rb_next(n)) {
 662                re = rb_entry(n, struct root_entry, node);
 663                btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
 664                          re->root_objectid, re->num_refs);
 665        }
 666
 667        list_for_each_entry(ra, &be->actions, list)
 668                dump_ref_action(fs_info, ra);
 669}
 670
 671/*
 672 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
 673 * @root: the root we are making this modification from.
 674 * @bytenr: the bytenr we are modifying.
 675 * @num_bytes: number of bytes.
 676 * @parent: the parent bytenr.
 677 * @ref_root: the original root owner of the bytenr.
 678 * @owner: level in the case of metadata, inode in the case of data.
 679 * @offset: 0 for metadata, file offset for data.
 680 * @action: the action that we are doing, this is the same as the delayed ref
 681 *      action.
 682 *
 683 * This will add an action item to the given bytenr and do sanity checks to make
 684 * sure we haven't messed something up.  If we are making a new allocation and
 685 * this block entry has history we will delete all previous actions as long as
 686 * our sanity checks pass as they are no longer needed.
 687 */
 688int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
 689                       u64 parent, u64 ref_root, u64 owner, u64 offset,
 690                       int action)
 691{
 692        struct btrfs_fs_info *fs_info = root->fs_info;
 693        struct ref_entry *ref = NULL, *exist;
 694        struct ref_action *ra = NULL;
 695        struct block_entry *be = NULL;
 696        struct root_entry *re = NULL;
 697        int ret = 0;
 698        bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
 699
 700        if (!btrfs_test_opt(root->fs_info, REF_VERIFY))
 701                return 0;
 702
 703        ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
 704        ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
 705        if (!ra || !ref) {
 706                kfree(ref);
 707                kfree(ra);
 708                ret = -ENOMEM;
 709                goto out;
 710        }
 711
 712        if (parent) {
 713                ref->parent = parent;
 714        } else {
 715                ref->root_objectid = ref_root;
 716                ref->owner = owner;
 717                ref->offset = offset;
 718        }
 719        ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
 720
 721        memcpy(&ra->ref, ref, sizeof(struct ref_entry));
 722        /*
 723         * Save the extra info from the delayed ref in the ref action to make it
 724         * easier to figure out what is happening.  The real ref's we add to the
 725         * ref tree need to reflect what we save on disk so it matches any
 726         * on-disk refs we pre-loaded.
 727         */
 728        ra->ref.owner = owner;
 729        ra->ref.offset = offset;
 730        ra->ref.root_objectid = ref_root;
 731        __save_stack_trace(ra);
 732
 733        INIT_LIST_HEAD(&ra->list);
 734        ra->action = action;
 735        ra->root = root->objectid;
 736
 737        /*
 738         * This is an allocation, preallocate the block_entry in case we haven't
 739         * used it before.
 740         */
 741        ret = -EINVAL;
 742        if (action == BTRFS_ADD_DELAYED_EXTENT) {
 743                /*
 744                 * For subvol_create we'll just pass in whatever the parent root
 745                 * is and the new root objectid, so let's not treat the passed
 746                 * in root as if it really has a ref for this bytenr.
 747                 */
 748                be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root);
 749                if (IS_ERR(be)) {
 750                        kfree(ra);
 751                        ret = PTR_ERR(be);
 752                        goto out;
 753                }
 754                be->num_refs++;
 755                if (metadata)
 756                        be->metadata = 1;
 757
 758                if (be->num_refs != 1) {
 759                        btrfs_err(fs_info,
 760                        "re-allocated a block that still has references to it!");
 761                        dump_block_entry(fs_info, be);
 762                        dump_ref_action(fs_info, ra);
 763                        goto out_unlock;
 764                }
 765
 766                while (!list_empty(&be->actions)) {
 767                        struct ref_action *tmp;
 768
 769                        tmp = list_first_entry(&be->actions, struct ref_action,
 770                                               list);
 771                        list_del(&tmp->list);
 772                        kfree(tmp);
 773                }
 774        } else {
 775                struct root_entry *tmp;
 776
 777                if (!parent) {
 778                        re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
 779                        if (!re) {
 780                                kfree(ref);
 781                                kfree(ra);
 782                                ret = -ENOMEM;
 783                                goto out;
 784                        }
 785                        /*
 786                         * This is the root that is modifying us, so it's the
 787                         * one we want to lookup below when we modify the
 788                         * re->num_refs.
 789                         */
 790                        ref_root = root->objectid;
 791                        re->root_objectid = root->objectid;
 792                        re->num_refs = 0;
 793                }
 794
 795                spin_lock(&root->fs_info->ref_verify_lock);
 796                be = lookup_block_entry(&root->fs_info->block_tree, bytenr);
 797                if (!be) {
 798                        btrfs_err(fs_info,
 799"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
 800                                  action, (unsigned long long)bytenr,
 801                                  (unsigned long long)num_bytes);
 802                        dump_ref_action(fs_info, ra);
 803                        kfree(ref);
 804                        kfree(ra);
 805                        goto out_unlock;
 806                }
 807
 808                if (!parent) {
 809                        tmp = insert_root_entry(&be->roots, re);
 810                        if (tmp) {
 811                                kfree(re);
 812                                re = tmp;
 813                        }
 814                }
 815        }
 816
 817        exist = insert_ref_entry(&be->refs, ref);
 818        if (exist) {
 819                if (action == BTRFS_DROP_DELAYED_REF) {
 820                        if (exist->num_refs == 0) {
 821                                btrfs_err(fs_info,
 822"dropping a ref for a existing root that doesn't have a ref on the block");
 823                                dump_block_entry(fs_info, be);
 824                                dump_ref_action(fs_info, ra);
 825                                kfree(ra);
 826                                goto out_unlock;
 827                        }
 828                        exist->num_refs--;
 829                        if (exist->num_refs == 0) {
 830                                rb_erase(&exist->node, &be->refs);
 831                                kfree(exist);
 832                        }
 833                } else if (!be->metadata) {
 834                        exist->num_refs++;
 835                } else {
 836                        btrfs_err(fs_info,
 837"attempting to add another ref for an existing ref on a tree block");
 838                        dump_block_entry(fs_info, be);
 839                        dump_ref_action(fs_info, ra);
 840                        kfree(ra);
 841                        goto out_unlock;
 842                }
 843                kfree(ref);
 844        } else {
 845                if (action == BTRFS_DROP_DELAYED_REF) {
 846                        btrfs_err(fs_info,
 847"dropping a ref for a root that doesn't have a ref on the block");
 848                        dump_block_entry(fs_info, be);
 849                        dump_ref_action(fs_info, ra);
 850                        kfree(ra);
 851                        goto out_unlock;
 852                }
 853        }
 854
 855        if (!parent && !re) {
 856                re = lookup_root_entry(&be->roots, ref_root);
 857                if (!re) {
 858                        /*
 859                         * This shouldn't happen because we will add our re
 860                         * above when we lookup the be with !parent, but just in
 861                         * case catch this case so we don't panic because I
 862                         * didn't thik of some other corner case.
 863                         */
 864                        btrfs_err(fs_info, "failed to find root %llu for %llu",
 865                                  root->objectid, be->bytenr);
 866                        dump_block_entry(fs_info, be);
 867                        dump_ref_action(fs_info, ra);
 868                        kfree(ra);
 869                        goto out_unlock;
 870                }
 871        }
 872        if (action == BTRFS_DROP_DELAYED_REF) {
 873                if (re)
 874                        re->num_refs--;
 875                be->num_refs--;
 876        } else if (action == BTRFS_ADD_DELAYED_REF) {
 877                be->num_refs++;
 878                if (re)
 879                        re->num_refs++;
 880        }
 881        list_add_tail(&ra->list, &be->actions);
 882        ret = 0;
 883out_unlock:
 884        spin_unlock(&root->fs_info->ref_verify_lock);
 885out:
 886        if (ret)
 887                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
 888        return ret;
 889}
 890
 891/* Free up the ref cache */
 892void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
 893{
 894        struct block_entry *be;
 895        struct rb_node *n;
 896
 897        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 898                return;
 899
 900        spin_lock(&fs_info->ref_verify_lock);
 901        while ((n = rb_first(&fs_info->block_tree))) {
 902                be = rb_entry(n, struct block_entry, node);
 903                rb_erase(&be->node, &fs_info->block_tree);
 904                free_block_entry(be);
 905                cond_resched_lock(&fs_info->ref_verify_lock);
 906        }
 907        spin_unlock(&fs_info->ref_verify_lock);
 908}
 909
 910void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
 911                               u64 len)
 912{
 913        struct block_entry *be = NULL, *entry;
 914        struct rb_node *n;
 915
 916        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 917                return;
 918
 919        spin_lock(&fs_info->ref_verify_lock);
 920        n = fs_info->block_tree.rb_node;
 921        while (n) {
 922                entry = rb_entry(n, struct block_entry, node);
 923                if (entry->bytenr < start) {
 924                        n = n->rb_right;
 925                } else if (entry->bytenr > start) {
 926                        n = n->rb_left;
 927                } else {
 928                        be = entry;
 929                        break;
 930                }
 931                /* We want to get as close to start as possible */
 932                if (be == NULL ||
 933                    (entry->bytenr < start && be->bytenr > start) ||
 934                    (entry->bytenr < start && entry->bytenr > be->bytenr))
 935                        be = entry;
 936        }
 937
 938        /*
 939         * Could have an empty block group, maybe have something to check for
 940         * this case to verify we were actually empty?
 941         */
 942        if (!be) {
 943                spin_unlock(&fs_info->ref_verify_lock);
 944                return;
 945        }
 946
 947        n = &be->node;
 948        while (n) {
 949                be = rb_entry(n, struct block_entry, node);
 950                n = rb_next(n);
 951                if (be->bytenr < start && be->bytenr + be->len > start) {
 952                        btrfs_err(fs_info,
 953                                "block entry overlaps a block group [%llu,%llu]!",
 954                                start, len);
 955                        dump_block_entry(fs_info, be);
 956                        continue;
 957                }
 958                if (be->bytenr < start)
 959                        continue;
 960                if (be->bytenr >= start + len)
 961                        break;
 962                if (be->bytenr + be->len > start + len) {
 963                        btrfs_err(fs_info,
 964                                "block entry overlaps a block group [%llu,%llu]!",
 965                                start, len);
 966                        dump_block_entry(fs_info, be);
 967                }
 968                rb_erase(&be->node, &fs_info->block_tree);
 969                free_block_entry(be);
 970        }
 971        spin_unlock(&fs_info->ref_verify_lock);
 972}
 973
 974/* Walk down all roots and build the ref tree, meant to be called at mount */
 975int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
 976{
 977        struct btrfs_path *path;
 978        struct extent_buffer *eb;
 979        u64 bytenr = 0, num_bytes = 0;
 980        int ret, level;
 981
 982        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 983                return 0;
 984
 985        path = btrfs_alloc_path();
 986        if (!path)
 987                return -ENOMEM;
 988
 989        eb = btrfs_read_lock_root_node(fs_info->extent_root);
 990        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 991        level = btrfs_header_level(eb);
 992        path->nodes[level] = eb;
 993        path->slots[level] = 0;
 994        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 995
 996        while (1) {
 997                /*
 998                 * We have to keep track of the bytenr/num_bytes we last hit
 999                 * because we could have run out of space for an inline ref, and
1000                 * would have had to added a ref key item which may appear on a
1001                 * different leaf from the original extent item.
1002                 */
1003                ret = walk_down_tree(fs_info->extent_root, path, level,
1004                                     &bytenr, &num_bytes);
1005                if (ret)
1006                        break;
1007                ret = walk_up_tree(path, &level);
1008                if (ret < 0)
1009                        break;
1010                if (ret > 0) {
1011                        ret = 0;
1012                        break;
1013                }
1014        }
1015        if (ret) {
1016                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1017                btrfs_free_ref_cache(fs_info);
1018        }
1019        btrfs_free_path(path);
1020        return ret;
1021}
1022