linux/fs/btrfs/ref-verify.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2014 Facebook.  All rights reserved.
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/stacktrace.h>
   8#include "ctree.h"
   9#include "disk-io.h"
  10#include "locking.h"
  11#include "delayed-ref.h"
  12#include "ref-verify.h"
  13
  14/*
  15 * Used to keep track the roots and number of refs each root has for a given
  16 * bytenr.  This just tracks the number of direct references, no shared
  17 * references.
  18 */
  19struct root_entry {
  20        u64 root_objectid;
  21        u64 num_refs;
  22        struct rb_node node;
  23};
  24
  25/*
  26 * These are meant to represent what should exist in the extent tree, these can
  27 * be used to verify the extent tree is consistent as these should all match
  28 * what the extent tree says.
  29 */
  30struct ref_entry {
  31        u64 root_objectid;
  32        u64 parent;
  33        u64 owner;
  34        u64 offset;
  35        u64 num_refs;
  36        struct rb_node node;
  37};
  38
  39#define MAX_TRACE       16
  40
  41/*
  42 * Whenever we add/remove a reference we record the action.  The action maps
  43 * back to the delayed ref action.  We hold the ref we are changing in the
  44 * action so we can account for the history properly, and we record the root we
  45 * were called with since it could be different from ref_root.  We also store
  46 * stack traces because that's how I roll.
  47 */
  48struct ref_action {
  49        int action;
  50        u64 root;
  51        struct ref_entry ref;
  52        struct list_head list;
  53        unsigned long trace[MAX_TRACE];
  54        unsigned int trace_len;
  55};
  56
  57/*
  58 * One of these for every block we reference, it holds the roots and references
  59 * to it as well as all of the ref actions that have occurred to it.  We never
  60 * free it until we unmount the file system in order to make sure re-allocations
  61 * are happening properly.
  62 */
  63struct block_entry {
  64        u64 bytenr;
  65        u64 len;
  66        u64 num_refs;
  67        int metadata;
  68        int from_disk;
  69        struct rb_root roots;
  70        struct rb_root refs;
  71        struct rb_node node;
  72        struct list_head actions;
  73};
  74
  75static struct block_entry *insert_block_entry(struct rb_root *root,
  76                                              struct block_entry *be)
  77{
  78        struct rb_node **p = &root->rb_node;
  79        struct rb_node *parent_node = NULL;
  80        struct block_entry *entry;
  81
  82        while (*p) {
  83                parent_node = *p;
  84                entry = rb_entry(parent_node, struct block_entry, node);
  85                if (entry->bytenr > be->bytenr)
  86                        p = &(*p)->rb_left;
  87                else if (entry->bytenr < be->bytenr)
  88                        p = &(*p)->rb_right;
  89                else
  90                        return entry;
  91        }
  92
  93        rb_link_node(&be->node, parent_node, p);
  94        rb_insert_color(&be->node, root);
  95        return NULL;
  96}
  97
  98static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr)
  99{
 100        struct rb_node *n;
 101        struct block_entry *entry = NULL;
 102
 103        n = root->rb_node;
 104        while (n) {
 105                entry = rb_entry(n, struct block_entry, node);
 106                if (entry->bytenr < bytenr)
 107                        n = n->rb_right;
 108                else if (entry->bytenr > bytenr)
 109                        n = n->rb_left;
 110                else
 111                        return entry;
 112        }
 113        return NULL;
 114}
 115
 116static struct root_entry *insert_root_entry(struct rb_root *root,
 117                                            struct root_entry *re)
 118{
 119        struct rb_node **p = &root->rb_node;
 120        struct rb_node *parent_node = NULL;
 121        struct root_entry *entry;
 122
 123        while (*p) {
 124                parent_node = *p;
 125                entry = rb_entry(parent_node, struct root_entry, node);
 126                if (entry->root_objectid > re->root_objectid)
 127                        p = &(*p)->rb_left;
 128                else if (entry->root_objectid < re->root_objectid)
 129                        p = &(*p)->rb_right;
 130                else
 131                        return entry;
 132        }
 133
 134        rb_link_node(&re->node, parent_node, p);
 135        rb_insert_color(&re->node, root);
 136        return NULL;
 137
 138}
 139
 140static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2)
 141{
 142        if (ref1->root_objectid < ref2->root_objectid)
 143                return -1;
 144        if (ref1->root_objectid > ref2->root_objectid)
 145                return 1;
 146        if (ref1->parent < ref2->parent)
 147                return -1;
 148        if (ref1->parent > ref2->parent)
 149                return 1;
 150        if (ref1->owner < ref2->owner)
 151                return -1;
 152        if (ref1->owner > ref2->owner)
 153                return 1;
 154        if (ref1->offset < ref2->offset)
 155                return -1;
 156        if (ref1->offset > ref2->offset)
 157                return 1;
 158        return 0;
 159}
 160
 161static struct ref_entry *insert_ref_entry(struct rb_root *root,
 162                                          struct ref_entry *ref)
 163{
 164        struct rb_node **p = &root->rb_node;
 165        struct rb_node *parent_node = NULL;
 166        struct ref_entry *entry;
 167        int cmp;
 168
 169        while (*p) {
 170                parent_node = *p;
 171                entry = rb_entry(parent_node, struct ref_entry, node);
 172                cmp = comp_refs(entry, ref);
 173                if (cmp > 0)
 174                        p = &(*p)->rb_left;
 175                else if (cmp < 0)
 176                        p = &(*p)->rb_right;
 177                else
 178                        return entry;
 179        }
 180
 181        rb_link_node(&ref->node, parent_node, p);
 182        rb_insert_color(&ref->node, root);
 183        return NULL;
 184
 185}
 186
 187static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid)
 188{
 189        struct rb_node *n;
 190        struct root_entry *entry = NULL;
 191
 192        n = root->rb_node;
 193        while (n) {
 194                entry = rb_entry(n, struct root_entry, node);
 195                if (entry->root_objectid < objectid)
 196                        n = n->rb_right;
 197                else if (entry->root_objectid > objectid)
 198                        n = n->rb_left;
 199                else
 200                        return entry;
 201        }
 202        return NULL;
 203}
 204
 205#ifdef CONFIG_STACKTRACE
 206static void __save_stack_trace(struct ref_action *ra)
 207{
 208        ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2);
 209}
 210
 211static void __print_stack_trace(struct btrfs_fs_info *fs_info,
 212                                struct ref_action *ra)
 213{
 214        if (ra->trace_len == 0) {
 215                btrfs_err(fs_info, "  ref-verify: no stacktrace");
 216                return;
 217        }
 218        stack_trace_print(ra->trace, ra->trace_len, 2);
 219}
 220#else
 221static void inline __save_stack_trace(struct ref_action *ra)
 222{
 223}
 224
 225static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
 226                                       struct ref_action *ra)
 227{
 228        btrfs_err(fs_info, "  ref-verify: no stacktrace support");
 229}
 230#endif
 231
 232static void free_block_entry(struct block_entry *be)
 233{
 234        struct root_entry *re;
 235        struct ref_entry *ref;
 236        struct ref_action *ra;
 237        struct rb_node *n;
 238
 239        while ((n = rb_first(&be->roots))) {
 240                re = rb_entry(n, struct root_entry, node);
 241                rb_erase(&re->node, &be->roots);
 242                kfree(re);
 243        }
 244
 245        while((n = rb_first(&be->refs))) {
 246                ref = rb_entry(n, struct ref_entry, node);
 247                rb_erase(&ref->node, &be->refs);
 248                kfree(ref);
 249        }
 250
 251        while (!list_empty(&be->actions)) {
 252                ra = list_first_entry(&be->actions, struct ref_action,
 253                                      list);
 254                list_del(&ra->list);
 255                kfree(ra);
 256        }
 257        kfree(be);
 258}
 259
 260static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info,
 261                                           u64 bytenr, u64 len,
 262                                           u64 root_objectid)
 263{
 264        struct block_entry *be = NULL, *exist;
 265        struct root_entry *re = NULL;
 266
 267        re = kzalloc(sizeof(struct root_entry), GFP_KERNEL);
 268        be = kzalloc(sizeof(struct block_entry), GFP_KERNEL);
 269        if (!be || !re) {
 270                kfree(re);
 271                kfree(be);
 272                return ERR_PTR(-ENOMEM);
 273        }
 274        be->bytenr = bytenr;
 275        be->len = len;
 276
 277        re->root_objectid = root_objectid;
 278        re->num_refs = 0;
 279
 280        spin_lock(&fs_info->ref_verify_lock);
 281        exist = insert_block_entry(&fs_info->block_tree, be);
 282        if (exist) {
 283                if (root_objectid) {
 284                        struct root_entry *exist_re;
 285
 286                        exist_re = insert_root_entry(&exist->roots, re);
 287                        if (exist_re)
 288                                kfree(re);
 289                }
 290                kfree(be);
 291                return exist;
 292        }
 293
 294        be->num_refs = 0;
 295        be->metadata = 0;
 296        be->from_disk = 0;
 297        be->roots = RB_ROOT;
 298        be->refs = RB_ROOT;
 299        INIT_LIST_HEAD(&be->actions);
 300        if (root_objectid)
 301                insert_root_entry(&be->roots, re);
 302        else
 303                kfree(re);
 304        return be;
 305}
 306
 307static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root,
 308                          u64 parent, u64 bytenr, int level)
 309{
 310        struct block_entry *be;
 311        struct root_entry *re;
 312        struct ref_entry *ref = NULL, *exist;
 313
 314        ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL);
 315        if (!ref)
 316                return -ENOMEM;
 317
 318        if (parent)
 319                ref->root_objectid = 0;
 320        else
 321                ref->root_objectid = ref_root;
 322        ref->parent = parent;
 323        ref->owner = level;
 324        ref->offset = 0;
 325        ref->num_refs = 1;
 326
 327        be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root);
 328        if (IS_ERR(be)) {
 329                kfree(ref);
 330                return PTR_ERR(be);
 331        }
 332        be->num_refs++;
 333        be->from_disk = 1;
 334        be->metadata = 1;
 335
 336        if (!parent) {
 337                ASSERT(ref_root);
 338                re = lookup_root_entry(&be->roots, ref_root);
 339                ASSERT(re);
 340                re->num_refs++;
 341        }
 342        exist = insert_ref_entry(&be->refs, ref);
 343        if (exist) {
 344                exist->num_refs++;
 345                kfree(ref);
 346        }
 347        spin_unlock(&fs_info->ref_verify_lock);
 348
 349        return 0;
 350}
 351
 352static int add_shared_data_ref(struct btrfs_fs_info *fs_info,
 353                               u64 parent, u32 num_refs, u64 bytenr,
 354                               u64 num_bytes)
 355{
 356        struct block_entry *be;
 357        struct ref_entry *ref;
 358
 359        ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
 360        if (!ref)
 361                return -ENOMEM;
 362        be = add_block_entry(fs_info, bytenr, num_bytes, 0);
 363        if (IS_ERR(be)) {
 364                kfree(ref);
 365                return PTR_ERR(be);
 366        }
 367        be->num_refs += num_refs;
 368
 369        ref->parent = parent;
 370        ref->num_refs = num_refs;
 371        if (insert_ref_entry(&be->refs, ref)) {
 372                spin_unlock(&fs_info->ref_verify_lock);
 373                btrfs_err(fs_info, "existing shared ref when reading from disk?");
 374                kfree(ref);
 375                return -EINVAL;
 376        }
 377        spin_unlock(&fs_info->ref_verify_lock);
 378        return 0;
 379}
 380
 381static int add_extent_data_ref(struct btrfs_fs_info *fs_info,
 382                               struct extent_buffer *leaf,
 383                               struct btrfs_extent_data_ref *dref,
 384                               u64 bytenr, u64 num_bytes)
 385{
 386        struct block_entry *be;
 387        struct ref_entry *ref;
 388        struct root_entry *re;
 389        u64 ref_root = btrfs_extent_data_ref_root(leaf, dref);
 390        u64 owner = btrfs_extent_data_ref_objectid(leaf, dref);
 391        u64 offset = btrfs_extent_data_ref_offset(leaf, dref);
 392        u32 num_refs = btrfs_extent_data_ref_count(leaf, dref);
 393
 394        ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL);
 395        if (!ref)
 396                return -ENOMEM;
 397        be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
 398        if (IS_ERR(be)) {
 399                kfree(ref);
 400                return PTR_ERR(be);
 401        }
 402        be->num_refs += num_refs;
 403
 404        ref->parent = 0;
 405        ref->owner = owner;
 406        ref->root_objectid = ref_root;
 407        ref->offset = offset;
 408        ref->num_refs = num_refs;
 409        if (insert_ref_entry(&be->refs, ref)) {
 410                spin_unlock(&fs_info->ref_verify_lock);
 411                btrfs_err(fs_info, "existing ref when reading from disk?");
 412                kfree(ref);
 413                return -EINVAL;
 414        }
 415
 416        re = lookup_root_entry(&be->roots, ref_root);
 417        if (!re) {
 418                spin_unlock(&fs_info->ref_verify_lock);
 419                btrfs_err(fs_info, "missing root in new block entry?");
 420                return -EINVAL;
 421        }
 422        re->num_refs += num_refs;
 423        spin_unlock(&fs_info->ref_verify_lock);
 424        return 0;
 425}
 426
 427static int process_extent_item(struct btrfs_fs_info *fs_info,
 428                               struct btrfs_path *path, struct btrfs_key *key,
 429                               int slot, int *tree_block_level)
 430{
 431        struct btrfs_extent_item *ei;
 432        struct btrfs_extent_inline_ref *iref;
 433        struct btrfs_extent_data_ref *dref;
 434        struct btrfs_shared_data_ref *sref;
 435        struct extent_buffer *leaf = path->nodes[0];
 436        u32 item_size = btrfs_item_size_nr(leaf, slot);
 437        unsigned long end, ptr;
 438        u64 offset, flags, count;
 439        int type, ret;
 440
 441        ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 442        flags = btrfs_extent_flags(leaf, ei);
 443
 444        if ((key->type == BTRFS_EXTENT_ITEM_KEY) &&
 445            flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 446                struct btrfs_tree_block_info *info;
 447
 448                info = (struct btrfs_tree_block_info *)(ei + 1);
 449                *tree_block_level = btrfs_tree_block_level(leaf, info);
 450                iref = (struct btrfs_extent_inline_ref *)(info + 1);
 451        } else {
 452                if (key->type == BTRFS_METADATA_ITEM_KEY)
 453                        *tree_block_level = key->offset;
 454                iref = (struct btrfs_extent_inline_ref *)(ei + 1);
 455        }
 456
 457        ptr = (unsigned long)iref;
 458        end = (unsigned long)ei + item_size;
 459        while (ptr < end) {
 460                iref = (struct btrfs_extent_inline_ref *)ptr;
 461                type = btrfs_extent_inline_ref_type(leaf, iref);
 462                offset = btrfs_extent_inline_ref_offset(leaf, iref);
 463                switch (type) {
 464                case BTRFS_TREE_BLOCK_REF_KEY:
 465                        ret = add_tree_block(fs_info, offset, 0, key->objectid,
 466                                             *tree_block_level);
 467                        break;
 468                case BTRFS_SHARED_BLOCK_REF_KEY:
 469                        ret = add_tree_block(fs_info, 0, offset, key->objectid,
 470                                             *tree_block_level);
 471                        break;
 472                case BTRFS_EXTENT_DATA_REF_KEY:
 473                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
 474                        ret = add_extent_data_ref(fs_info, leaf, dref,
 475                                                  key->objectid, key->offset);
 476                        break;
 477                case BTRFS_SHARED_DATA_REF_KEY:
 478                        sref = (struct btrfs_shared_data_ref *)(iref + 1);
 479                        count = btrfs_shared_data_ref_count(leaf, sref);
 480                        ret = add_shared_data_ref(fs_info, offset, count,
 481                                                  key->objectid, key->offset);
 482                        break;
 483                default:
 484                        btrfs_err(fs_info, "invalid key type in iref");
 485                        ret = -EINVAL;
 486                        break;
 487                }
 488                if (ret)
 489                        break;
 490                ptr += btrfs_extent_inline_ref_size(type);
 491        }
 492        return ret;
 493}
 494
 495static int process_leaf(struct btrfs_root *root,
 496                        struct btrfs_path *path, u64 *bytenr, u64 *num_bytes)
 497{
 498        struct btrfs_fs_info *fs_info = root->fs_info;
 499        struct extent_buffer *leaf = path->nodes[0];
 500        struct btrfs_extent_data_ref *dref;
 501        struct btrfs_shared_data_ref *sref;
 502        u32 count;
 503        int i = 0, tree_block_level = 0, ret;
 504        struct btrfs_key key;
 505        int nritems = btrfs_header_nritems(leaf);
 506
 507        for (i = 0; i < nritems; i++) {
 508                btrfs_item_key_to_cpu(leaf, &key, i);
 509                switch (key.type) {
 510                case BTRFS_EXTENT_ITEM_KEY:
 511                        *num_bytes = key.offset;
 512                        /* fall through */
 513                case BTRFS_METADATA_ITEM_KEY:
 514                        *bytenr = key.objectid;
 515                        ret = process_extent_item(fs_info, path, &key, i,
 516                                                  &tree_block_level);
 517                        break;
 518                case BTRFS_TREE_BLOCK_REF_KEY:
 519                        ret = add_tree_block(fs_info, key.offset, 0,
 520                                             key.objectid, tree_block_level);
 521                        break;
 522                case BTRFS_SHARED_BLOCK_REF_KEY:
 523                        ret = add_tree_block(fs_info, 0, key.offset,
 524                                             key.objectid, tree_block_level);
 525                        break;
 526                case BTRFS_EXTENT_DATA_REF_KEY:
 527                        dref = btrfs_item_ptr(leaf, i,
 528                                              struct btrfs_extent_data_ref);
 529                        ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr,
 530                                                  *num_bytes);
 531                        break;
 532                case BTRFS_SHARED_DATA_REF_KEY:
 533                        sref = btrfs_item_ptr(leaf, i,
 534                                              struct btrfs_shared_data_ref);
 535                        count = btrfs_shared_data_ref_count(leaf, sref);
 536                        ret = add_shared_data_ref(fs_info, key.offset, count,
 537                                                  *bytenr, *num_bytes);
 538                        break;
 539                default:
 540                        break;
 541                }
 542                if (ret)
 543                        break;
 544        }
 545        return ret;
 546}
 547
 548/* Walk down to the leaf from the given level */
 549static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 550                          int level, u64 *bytenr, u64 *num_bytes)
 551{
 552        struct btrfs_fs_info *fs_info = root->fs_info;
 553        struct extent_buffer *eb;
 554        u64 block_bytenr, gen;
 555        int ret = 0;
 556
 557        while (level >= 0) {
 558                if (level) {
 559                        struct btrfs_key first_key;
 560
 561                        block_bytenr = btrfs_node_blockptr(path->nodes[level],
 562                                                           path->slots[level]);
 563                        gen = btrfs_node_ptr_generation(path->nodes[level],
 564                                                        path->slots[level]);
 565                        btrfs_node_key_to_cpu(path->nodes[level], &first_key,
 566                                              path->slots[level]);
 567                        eb = read_tree_block(fs_info, block_bytenr, gen,
 568                                             level - 1, &first_key);
 569                        if (IS_ERR(eb))
 570                                return PTR_ERR(eb);
 571                        if (!extent_buffer_uptodate(eb)) {
 572                                free_extent_buffer(eb);
 573                                return -EIO;
 574                        }
 575                        btrfs_tree_read_lock(eb);
 576                        btrfs_set_lock_blocking_read(eb);
 577                        path->nodes[level-1] = eb;
 578                        path->slots[level-1] = 0;
 579                        path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
 580                } else {
 581                        ret = process_leaf(root, path, bytenr, num_bytes);
 582                        if (ret)
 583                                break;
 584                }
 585                level--;
 586        }
 587        return ret;
 588}
 589
 590/* Walk up to the next node that needs to be processed */
 591static int walk_up_tree(struct btrfs_path *path, int *level)
 592{
 593        int l;
 594
 595        for (l = 0; l < BTRFS_MAX_LEVEL; l++) {
 596                if (!path->nodes[l])
 597                        continue;
 598                if (l) {
 599                        path->slots[l]++;
 600                        if (path->slots[l] <
 601                            btrfs_header_nritems(path->nodes[l])) {
 602                                *level = l;
 603                                return 0;
 604                        }
 605                }
 606                btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]);
 607                free_extent_buffer(path->nodes[l]);
 608                path->nodes[l] = NULL;
 609                path->slots[l] = 0;
 610                path->locks[l] = 0;
 611        }
 612
 613        return 1;
 614}
 615
 616static void dump_ref_action(struct btrfs_fs_info *fs_info,
 617                            struct ref_action *ra)
 618{
 619        btrfs_err(fs_info,
 620"  Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
 621                  ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent,
 622                  ra->ref.owner, ra->ref.offset, ra->ref.num_refs);
 623        __print_stack_trace(fs_info, ra);
 624}
 625
 626/*
 627 * Dumps all the information from the block entry to printk, it's going to be
 628 * awesome.
 629 */
 630static void dump_block_entry(struct btrfs_fs_info *fs_info,
 631                             struct block_entry *be)
 632{
 633        struct ref_entry *ref;
 634        struct root_entry *re;
 635        struct ref_action *ra;
 636        struct rb_node *n;
 637
 638        btrfs_err(fs_info,
 639"dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d",
 640                  be->bytenr, be->len, be->num_refs, be->metadata,
 641                  be->from_disk);
 642
 643        for (n = rb_first(&be->refs); n; n = rb_next(n)) {
 644                ref = rb_entry(n, struct ref_entry, node);
 645                btrfs_err(fs_info,
 646"  ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu",
 647                          ref->root_objectid, ref->parent, ref->owner,
 648                          ref->offset, ref->num_refs);
 649        }
 650
 651        for (n = rb_first(&be->roots); n; n = rb_next(n)) {
 652                re = rb_entry(n, struct root_entry, node);
 653                btrfs_err(fs_info, "  root entry %llu, num_refs %llu",
 654                          re->root_objectid, re->num_refs);
 655        }
 656
 657        list_for_each_entry(ra, &be->actions, list)
 658                dump_ref_action(fs_info, ra);
 659}
 660
 661/*
 662 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr
 663 *
 664 * This will add an action item to the given bytenr and do sanity checks to make
 665 * sure we haven't messed something up.  If we are making a new allocation and
 666 * this block entry has history we will delete all previous actions as long as
 667 * our sanity checks pass as they are no longer needed.
 668 */
 669int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
 670                       struct btrfs_ref *generic_ref)
 671{
 672        struct ref_entry *ref = NULL, *exist;
 673        struct ref_action *ra = NULL;
 674        struct block_entry *be = NULL;
 675        struct root_entry *re = NULL;
 676        int action = generic_ref->action;
 677        int ret = 0;
 678        bool metadata;
 679        u64 bytenr = generic_ref->bytenr;
 680        u64 num_bytes = generic_ref->len;
 681        u64 parent = generic_ref->parent;
 682        u64 ref_root;
 683        u64 owner;
 684        u64 offset;
 685
 686        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 687                return 0;
 688
 689        if (generic_ref->type == BTRFS_REF_METADATA) {
 690                ref_root = generic_ref->tree_ref.root;
 691                owner = generic_ref->tree_ref.level;
 692                offset = 0;
 693        } else {
 694                ref_root = generic_ref->data_ref.ref_root;
 695                owner = generic_ref->data_ref.ino;
 696                offset = generic_ref->data_ref.offset;
 697        }
 698        metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
 699
 700        ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS);
 701        ra = kmalloc(sizeof(struct ref_action), GFP_NOFS);
 702        if (!ra || !ref) {
 703                kfree(ref);
 704                kfree(ra);
 705                ret = -ENOMEM;
 706                goto out;
 707        }
 708
 709        if (parent) {
 710                ref->parent = parent;
 711        } else {
 712                ref->root_objectid = ref_root;
 713                ref->owner = owner;
 714                ref->offset = offset;
 715        }
 716        ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1;
 717
 718        memcpy(&ra->ref, ref, sizeof(struct ref_entry));
 719        /*
 720         * Save the extra info from the delayed ref in the ref action to make it
 721         * easier to figure out what is happening.  The real ref's we add to the
 722         * ref tree need to reflect what we save on disk so it matches any
 723         * on-disk refs we pre-loaded.
 724         */
 725        ra->ref.owner = owner;
 726        ra->ref.offset = offset;
 727        ra->ref.root_objectid = ref_root;
 728        __save_stack_trace(ra);
 729
 730        INIT_LIST_HEAD(&ra->list);
 731        ra->action = action;
 732        ra->root = generic_ref->real_root;
 733
 734        /*
 735         * This is an allocation, preallocate the block_entry in case we haven't
 736         * used it before.
 737         */
 738        ret = -EINVAL;
 739        if (action == BTRFS_ADD_DELAYED_EXTENT) {
 740                /*
 741                 * For subvol_create we'll just pass in whatever the parent root
 742                 * is and the new root objectid, so let's not treat the passed
 743                 * in root as if it really has a ref for this bytenr.
 744                 */
 745                be = add_block_entry(fs_info, bytenr, num_bytes, ref_root);
 746                if (IS_ERR(be)) {
 747                        kfree(ra);
 748                        ret = PTR_ERR(be);
 749                        goto out;
 750                }
 751                be->num_refs++;
 752                if (metadata)
 753                        be->metadata = 1;
 754
 755                if (be->num_refs != 1) {
 756                        btrfs_err(fs_info,
 757                        "re-allocated a block that still has references to it!");
 758                        dump_block_entry(fs_info, be);
 759                        dump_ref_action(fs_info, ra);
 760                        goto out_unlock;
 761                }
 762
 763                while (!list_empty(&be->actions)) {
 764                        struct ref_action *tmp;
 765
 766                        tmp = list_first_entry(&be->actions, struct ref_action,
 767                                               list);
 768                        list_del(&tmp->list);
 769                        kfree(tmp);
 770                }
 771        } else {
 772                struct root_entry *tmp;
 773
 774                if (!parent) {
 775                        re = kmalloc(sizeof(struct root_entry), GFP_NOFS);
 776                        if (!re) {
 777                                kfree(ref);
 778                                kfree(ra);
 779                                ret = -ENOMEM;
 780                                goto out;
 781                        }
 782                        /*
 783                         * This is the root that is modifying us, so it's the
 784                         * one we want to lookup below when we modify the
 785                         * re->num_refs.
 786                         */
 787                        ref_root = generic_ref->real_root;
 788                        re->root_objectid = generic_ref->real_root;
 789                        re->num_refs = 0;
 790                }
 791
 792                spin_lock(&fs_info->ref_verify_lock);
 793                be = lookup_block_entry(&fs_info->block_tree, bytenr);
 794                if (!be) {
 795                        btrfs_err(fs_info,
 796"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
 797                                  action, (unsigned long long)bytenr,
 798                                  (unsigned long long)num_bytes);
 799                        dump_ref_action(fs_info, ra);
 800                        kfree(ref);
 801                        kfree(ra);
 802                        goto out_unlock;
 803                }
 804
 805                if (!parent) {
 806                        tmp = insert_root_entry(&be->roots, re);
 807                        if (tmp) {
 808                                kfree(re);
 809                                re = tmp;
 810                        }
 811                }
 812        }
 813
 814        exist = insert_ref_entry(&be->refs, ref);
 815        if (exist) {
 816                if (action == BTRFS_DROP_DELAYED_REF) {
 817                        if (exist->num_refs == 0) {
 818                                btrfs_err(fs_info,
 819"dropping a ref for a existing root that doesn't have a ref on the block");
 820                                dump_block_entry(fs_info, be);
 821                                dump_ref_action(fs_info, ra);
 822                                kfree(ra);
 823                                goto out_unlock;
 824                        }
 825                        exist->num_refs--;
 826                        if (exist->num_refs == 0) {
 827                                rb_erase(&exist->node, &be->refs);
 828                                kfree(exist);
 829                        }
 830                } else if (!be->metadata) {
 831                        exist->num_refs++;
 832                } else {
 833                        btrfs_err(fs_info,
 834"attempting to add another ref for an existing ref on a tree block");
 835                        dump_block_entry(fs_info, be);
 836                        dump_ref_action(fs_info, ra);
 837                        kfree(ra);
 838                        goto out_unlock;
 839                }
 840                kfree(ref);
 841        } else {
 842                if (action == BTRFS_DROP_DELAYED_REF) {
 843                        btrfs_err(fs_info,
 844"dropping a ref for a root that doesn't have a ref on the block");
 845                        dump_block_entry(fs_info, be);
 846                        dump_ref_action(fs_info, ra);
 847                        kfree(ra);
 848                        goto out_unlock;
 849                }
 850        }
 851
 852        if (!parent && !re) {
 853                re = lookup_root_entry(&be->roots, ref_root);
 854                if (!re) {
 855                        /*
 856                         * This shouldn't happen because we will add our re
 857                         * above when we lookup the be with !parent, but just in
 858                         * case catch this case so we don't panic because I
 859                         * didn't think of some other corner case.
 860                         */
 861                        btrfs_err(fs_info, "failed to find root %llu for %llu",
 862                                  generic_ref->real_root, be->bytenr);
 863                        dump_block_entry(fs_info, be);
 864                        dump_ref_action(fs_info, ra);
 865                        kfree(ra);
 866                        goto out_unlock;
 867                }
 868        }
 869        if (action == BTRFS_DROP_DELAYED_REF) {
 870                if (re)
 871                        re->num_refs--;
 872                be->num_refs--;
 873        } else if (action == BTRFS_ADD_DELAYED_REF) {
 874                be->num_refs++;
 875                if (re)
 876                        re->num_refs++;
 877        }
 878        list_add_tail(&ra->list, &be->actions);
 879        ret = 0;
 880out_unlock:
 881        spin_unlock(&fs_info->ref_verify_lock);
 882out:
 883        if (ret)
 884                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
 885        return ret;
 886}
 887
 888/* Free up the ref cache */
 889void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
 890{
 891        struct block_entry *be;
 892        struct rb_node *n;
 893
 894        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 895                return;
 896
 897        spin_lock(&fs_info->ref_verify_lock);
 898        while ((n = rb_first(&fs_info->block_tree))) {
 899                be = rb_entry(n, struct block_entry, node);
 900                rb_erase(&be->node, &fs_info->block_tree);
 901                free_block_entry(be);
 902                cond_resched_lock(&fs_info->ref_verify_lock);
 903        }
 904        spin_unlock(&fs_info->ref_verify_lock);
 905}
 906
 907void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
 908                               u64 len)
 909{
 910        struct block_entry *be = NULL, *entry;
 911        struct rb_node *n;
 912
 913        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 914                return;
 915
 916        spin_lock(&fs_info->ref_verify_lock);
 917        n = fs_info->block_tree.rb_node;
 918        while (n) {
 919                entry = rb_entry(n, struct block_entry, node);
 920                if (entry->bytenr < start) {
 921                        n = n->rb_right;
 922                } else if (entry->bytenr > start) {
 923                        n = n->rb_left;
 924                } else {
 925                        be = entry;
 926                        break;
 927                }
 928                /* We want to get as close to start as possible */
 929                if (be == NULL ||
 930                    (entry->bytenr < start && be->bytenr > start) ||
 931                    (entry->bytenr < start && entry->bytenr > be->bytenr))
 932                        be = entry;
 933        }
 934
 935        /*
 936         * Could have an empty block group, maybe have something to check for
 937         * this case to verify we were actually empty?
 938         */
 939        if (!be) {
 940                spin_unlock(&fs_info->ref_verify_lock);
 941                return;
 942        }
 943
 944        n = &be->node;
 945        while (n) {
 946                be = rb_entry(n, struct block_entry, node);
 947                n = rb_next(n);
 948                if (be->bytenr < start && be->bytenr + be->len > start) {
 949                        btrfs_err(fs_info,
 950                                "block entry overlaps a block group [%llu,%llu]!",
 951                                start, len);
 952                        dump_block_entry(fs_info, be);
 953                        continue;
 954                }
 955                if (be->bytenr < start)
 956                        continue;
 957                if (be->bytenr >= start + len)
 958                        break;
 959                if (be->bytenr + be->len > start + len) {
 960                        btrfs_err(fs_info,
 961                                "block entry overlaps a block group [%llu,%llu]!",
 962                                start, len);
 963                        dump_block_entry(fs_info, be);
 964                }
 965                rb_erase(&be->node, &fs_info->block_tree);
 966                free_block_entry(be);
 967        }
 968        spin_unlock(&fs_info->ref_verify_lock);
 969}
 970
 971/* Walk down all roots and build the ref tree, meant to be called at mount */
 972int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
 973{
 974        struct btrfs_path *path;
 975        struct extent_buffer *eb;
 976        u64 bytenr = 0, num_bytes = 0;
 977        int ret, level;
 978
 979        if (!btrfs_test_opt(fs_info, REF_VERIFY))
 980                return 0;
 981
 982        path = btrfs_alloc_path();
 983        if (!path)
 984                return -ENOMEM;
 985
 986        eb = btrfs_read_lock_root_node(fs_info->extent_root);
 987        btrfs_set_lock_blocking_read(eb);
 988        level = btrfs_header_level(eb);
 989        path->nodes[level] = eb;
 990        path->slots[level] = 0;
 991        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 992
 993        while (1) {
 994                /*
 995                 * We have to keep track of the bytenr/num_bytes we last hit
 996                 * because we could have run out of space for an inline ref, and
 997                 * would have had to added a ref key item which may appear on a
 998                 * different leaf from the original extent item.
 999                 */
1000                ret = walk_down_tree(fs_info->extent_root, path, level,
1001                                     &bytenr, &num_bytes);
1002                if (ret)
1003                        break;
1004                ret = walk_up_tree(path, &level);
1005                if (ret < 0)
1006                        break;
1007                if (ret > 0) {
1008                        ret = 0;
1009                        break;
1010                }
1011        }
1012        if (ret) {
1013                btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
1014                btrfs_free_ref_cache(fs_info);
1015        }
1016        btrfs_free_path(path);
1017        return ret;
1018}
1019