uboot/fs/btrfs/disk-io.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2#include <common.h>
   3#include <fs_internal.h>
   4#include <uuid.h>
   5#include <memalign.h>
   6#include "kernel-shared/btrfs_tree.h"
   7#include "common/rbtree-utils.h"
   8#include "disk-io.h"
   9#include "ctree.h"
  10#include "btrfs.h"
  11#include "volumes.h"
  12#include "extent-io.h"
  13#include "crypto/hash.h"
  14
  15/* specified errno for check_tree_block */
  16#define BTRFS_BAD_BYTENR                (-1)
  17#define BTRFS_BAD_FSID                  (-2)
  18#define BTRFS_BAD_LEVEL                 (-3)
  19#define BTRFS_BAD_NRITEMS               (-4)
  20
  21/* Calculate max possible nritems for a leaf/node */
  22static u32 max_nritems(u8 level, u32 nodesize)
  23{
  24
  25        if (level == 0)
  26                return ((nodesize - sizeof(struct btrfs_header)) /
  27                        sizeof(struct btrfs_item));
  28        return ((nodesize - sizeof(struct btrfs_header)) /
  29                sizeof(struct btrfs_key_ptr));
  30}
  31
  32static int check_tree_block(struct btrfs_fs_info *fs_info,
  33                            struct extent_buffer *buf)
  34{
  35
  36        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
  37        u32 nodesize = fs_info->nodesize;
  38        bool fsid_match = false;
  39        int ret = BTRFS_BAD_FSID;
  40
  41        if (buf->start != btrfs_header_bytenr(buf))
  42                return BTRFS_BAD_BYTENR;
  43        if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
  44                return BTRFS_BAD_LEVEL;
  45        if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
  46                                                    nodesize))
  47                return BTRFS_BAD_NRITEMS;
  48
  49        /* Only leaf can be empty */
  50        if (btrfs_header_nritems(buf) == 0 &&
  51            btrfs_header_level(buf) != 0)
  52                return BTRFS_BAD_NRITEMS;
  53
  54        while (fs_devices) {
  55                /*
  56                 * Checking the incompat flag is only valid for the current
  57                 * fs. For seed devices it's forbidden to have their uuid
  58                 * changed so reading ->fsid in this case is fine
  59                 */
  60                if (fs_devices == fs_info->fs_devices &&
  61                    btrfs_fs_incompat(fs_info, METADATA_UUID))
  62                        fsid_match = !memcmp_extent_buffer(buf,
  63                                                   fs_devices->metadata_uuid,
  64                                                   btrfs_header_fsid(),
  65                                                   BTRFS_FSID_SIZE);
  66                else
  67                        fsid_match = !memcmp_extent_buffer(buf,
  68                                                    fs_devices->fsid,
  69                                                    btrfs_header_fsid(),
  70                                                    BTRFS_FSID_SIZE);
  71
  72
  73                if (fsid_match) {
  74                        ret = 0;
  75                        break;
  76                }
  77                fs_devices = fs_devices->seed;
  78        }
  79        return ret;
  80}
  81
  82static void print_tree_block_error(struct btrfs_fs_info *fs_info,
  83                                struct extent_buffer *eb,
  84                                int err)
  85{
  86        char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
  87        char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
  88        u8 buf[BTRFS_UUID_SIZE];
  89
  90        if (!err)
  91                return;
  92
  93        fprintf(stderr, "bad tree block %llu, ", eb->start);
  94        switch (err) {
  95        case BTRFS_BAD_FSID:
  96                read_extent_buffer(eb, buf, btrfs_header_fsid(),
  97                                   BTRFS_UUID_SIZE);
  98                uuid_unparse(buf, found_uuid);
  99                uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
 100                fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
 101                        fs_uuid, found_uuid);
 102                break;
 103        case BTRFS_BAD_BYTENR:
 104                fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
 105                        eb->start, btrfs_header_bytenr(eb));
 106                break;
 107        case BTRFS_BAD_LEVEL:
 108                fprintf(stderr, "bad level, %u > %d\n",
 109                        btrfs_header_level(eb), BTRFS_MAX_LEVEL);
 110                break;
 111        case BTRFS_BAD_NRITEMS:
 112                fprintf(stderr, "invalid nr_items: %u\n",
 113                        btrfs_header_nritems(eb));
 114                break;
 115        }
 116}
 117
 118int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
 119{
 120        memset(out, 0, BTRFS_CSUM_SIZE);
 121
 122        switch (csum_type) {
 123        case BTRFS_CSUM_TYPE_CRC32:
 124                return hash_crc32c(data, len, out);
 125        case BTRFS_CSUM_TYPE_XXHASH:
 126                return hash_xxhash(data, len, out);
 127        case BTRFS_CSUM_TYPE_SHA256:
 128                return hash_sha256(data, len, out);
 129        default:
 130                printf("Unknown csum type %d\n", csum_type);
 131                return -EINVAL;
 132        }
 133}
 134
 135/*
 136 * Check if the super is valid:
 137 * - nodesize/sectorsize - minimum, maximum, alignment
 138 * - tree block starts   - alignment
 139 * - number of devices   - something sane
 140 * - sys array size      - maximum
 141 */
 142static int btrfs_check_super(struct btrfs_super_block *sb)
 143{
 144        u8 result[BTRFS_CSUM_SIZE];
 145        u16 csum_type;
 146        int csum_size;
 147        u8 *metadata_uuid;
 148
 149        if (btrfs_super_magic(sb) != BTRFS_MAGIC)
 150                return -EIO;
 151
 152        csum_type = btrfs_super_csum_type(sb);
 153        if (csum_type >= btrfs_super_num_csums()) {
 154                error("unsupported checksum algorithm %u", csum_type);
 155                return -EIO;
 156        }
 157        csum_size = btrfs_super_csum_size(sb);
 158
 159        btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
 160                        result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
 161
 162        if (memcmp(result, sb->csum, csum_size)) {
 163                error("superblock checksum mismatch");
 164                return -EIO;
 165        }
 166        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
 167                error("tree_root level too big: %d >= %d",
 168                        btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
 169                goto error_out;
 170        }
 171        if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
 172                error("chunk_root level too big: %d >= %d",
 173                        btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
 174                goto error_out;
 175        }
 176        if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
 177                error("log_root level too big: %d >= %d",
 178                        btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
 179                goto error_out;
 180        }
 181
 182        if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
 183                error("tree_root block unaligned: %llu", btrfs_super_root(sb));
 184                goto error_out;
 185        }
 186        if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
 187                error("chunk_root block unaligned: %llu",
 188                        btrfs_super_chunk_root(sb));
 189                goto error_out;
 190        }
 191        if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
 192                error("log_root block unaligned: %llu",
 193                        btrfs_super_log_root(sb));
 194                goto error_out;
 195        }
 196        if (btrfs_super_nodesize(sb) < 4096) {
 197                error("nodesize too small: %u < 4096",
 198                        btrfs_super_nodesize(sb));
 199                goto error_out;
 200        }
 201        if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
 202                error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
 203                goto error_out;
 204        }
 205        if (btrfs_super_sectorsize(sb) < 4096) {
 206                error("sectorsize too small: %u < 4096",
 207                        btrfs_super_sectorsize(sb));
 208                goto error_out;
 209        }
 210        if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
 211                error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
 212                goto error_out;
 213        }
 214        if (btrfs_super_total_bytes(sb) == 0) {
 215                error("invalid total_bytes 0");
 216                goto error_out;
 217        }
 218        if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
 219                error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
 220                goto error_out;
 221        }
 222        if ((btrfs_super_stripesize(sb) != 4096)
 223                && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
 224                error("invalid stripesize %u", btrfs_super_stripesize(sb));
 225                goto error_out;
 226        }
 227
 228        if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
 229                metadata_uuid = sb->metadata_uuid;
 230        else
 231                metadata_uuid = sb->fsid;
 232
 233        if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
 234                char fsid[BTRFS_UUID_UNPARSED_SIZE];
 235                char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
 236
 237                uuid_unparse(sb->metadata_uuid, fsid);
 238                uuid_unparse(sb->dev_item.fsid, dev_fsid);
 239                error("dev_item UUID does not match fsid: %s != %s",
 240                        dev_fsid, fsid);
 241                goto error_out;
 242        }
 243
 244        /*
 245         * Hint to catch really bogus numbers, bitflips or so
 246         */
 247        if (btrfs_super_num_devices(sb) > (1UL << 31)) {
 248                error("suspicious number of devices: %llu",
 249                        btrfs_super_num_devices(sb));
 250        }
 251
 252        if (btrfs_super_num_devices(sb) == 0) {
 253                error("number of devices is 0");
 254                goto error_out;
 255        }
 256
 257        /*
 258         * Obvious sys_chunk_array corruptions, it must hold at least one key
 259         * and one chunk
 260         */
 261        if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
 262                error("system chunk array too big %u > %u",
 263                      btrfs_super_sys_array_size(sb),
 264                      BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
 265                goto error_out;
 266        }
 267        if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
 268                        + sizeof(struct btrfs_chunk)) {
 269                error("system chunk array too small %u < %zu",
 270                      btrfs_super_sys_array_size(sb),
 271                      sizeof(struct btrfs_disk_key) +
 272                      sizeof(struct btrfs_chunk));
 273                goto error_out;
 274        }
 275
 276        return 0;
 277
 278error_out:
 279        error("superblock checksum matches but it has invalid members");
 280        return -EIO;
 281}
 282
 283/*
 284 * btrfs_read_dev_super - read a valid primary superblock from a block device
 285 * @desc,@part: file descriptor of the device
 286 * @sb:         buffer where the superblock is going to be read in
 287 *
 288 * Unlike the btrfs-progs/kernel version, here we ony care about the first
 289 * super block, thus it's much simpler.
 290 */
 291int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
 292                         struct btrfs_super_block *sb)
 293{
 294        ALLOC_CACHE_ALIGN_BUFFER(char, tmp, BTRFS_SUPER_INFO_SIZE);
 295        struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
 296        int ret;
 297
 298        ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
 299                              BTRFS_SUPER_INFO_OFFSET);
 300        if (ret < BTRFS_SUPER_INFO_SIZE)
 301                return -EIO;
 302
 303        if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
 304                return -EIO;
 305
 306        if (btrfs_check_super(buf))
 307                return -EIO;
 308
 309        memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
 310        return 0;
 311}
 312
 313static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
 314                                  int verify, int silent, u16 csum_type)
 315{
 316        u8 result[BTRFS_CSUM_SIZE];
 317        u32 len;
 318
 319        len = buf->len - BTRFS_CSUM_SIZE;
 320        btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
 321                        result, len);
 322
 323        if (verify) {
 324                if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
 325                        /* FIXME: format */
 326                        if (!silent)
 327                                printk("checksum verify failed on %llu found %08X wanted %08X\n",
 328                                       (unsigned long long)buf->start,
 329                                       result[0],
 330                                       buf->data[0]);
 331                        return 1;
 332                }
 333        } else {
 334                write_extent_buffer(buf, result, 0, csum_size);
 335        }
 336        return 0;
 337}
 338
 339int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
 340                         u16 csum_type)
 341{
 342        return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
 343}
 344
 345static int csum_tree_block(struct btrfs_fs_info *fs_info,
 346                           struct extent_buffer *buf, int verify)
 347{
 348        u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 349        u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
 350
 351        return csum_tree_block_size(buf, csum_size, verify, csum_type);
 352}
 353
 354struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
 355                                            u64 bytenr, u32 blocksize)
 356{
 357        return find_extent_buffer(&fs_info->extent_cache,
 358                                  bytenr, blocksize);
 359}
 360
 361struct extent_buffer* btrfs_find_create_tree_block(
 362                struct btrfs_fs_info *fs_info, u64 bytenr)
 363{
 364        return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
 365}
 366
 367static int verify_parent_transid(struct extent_io_tree *io_tree,
 368                                 struct extent_buffer *eb, u64 parent_transid,
 369                                 int ignore)
 370{
 371        int ret;
 372
 373        if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
 374                return 0;
 375
 376        if (extent_buffer_uptodate(eb) &&
 377            btrfs_header_generation(eb) == parent_transid) {
 378                ret = 0;
 379                goto out;
 380        }
 381        printk("parent transid verify failed on %llu wanted %llu found %llu\n",
 382               (unsigned long long)eb->start,
 383               (unsigned long long)parent_transid,
 384               (unsigned long long)btrfs_header_generation(eb));
 385        if (ignore) {
 386                eb->flags |= EXTENT_BAD_TRANSID;
 387                printk("Ignoring transid failure\n");
 388                return 0;
 389        }
 390
 391        ret = 1;
 392out:
 393        clear_extent_buffer_uptodate(eb);
 394        return ret;
 395
 396}
 397
 398int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
 399{
 400        unsigned long offset = 0;
 401        struct btrfs_multi_bio *multi = NULL;
 402        struct btrfs_device *device;
 403        int ret = 0;
 404        u64 read_len;
 405        unsigned long bytes_left = eb->len;
 406
 407        while (bytes_left) {
 408                read_len = bytes_left;
 409                device = NULL;
 410
 411                ret = btrfs_map_block(info, READ, eb->start + offset,
 412                                      &read_len, &multi, mirror, NULL);
 413                if (ret) {
 414                        printk("Couldn't map the block %Lu\n", eb->start + offset);
 415                        kfree(multi);
 416                        return -EIO;
 417                }
 418                device = multi->stripes[0].dev;
 419
 420                if (!device->desc || !device->part) {
 421                        kfree(multi);
 422                        return -EIO;
 423                }
 424
 425                if (read_len > bytes_left)
 426                        read_len = bytes_left;
 427
 428                ret = read_extent_from_disk(device->desc, device->part,
 429                                            multi->stripes[0].physical, eb,
 430                                            offset, read_len);
 431                kfree(multi);
 432                multi = NULL;
 433
 434                if (ret)
 435                        return -EIO;
 436                offset += read_len;
 437                bytes_left -= read_len;
 438        }
 439        return 0;
 440}
 441
 442struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
 443                u64 parent_transid)
 444{
 445        int ret;
 446        struct extent_buffer *eb;
 447        u64 best_transid = 0;
 448        u32 sectorsize = fs_info->sectorsize;
 449        int mirror_num = 1;
 450        int good_mirror = 0;
 451        int candidate_mirror = 0;
 452        int num_copies;
 453        int ignore = 0;
 454
 455        /*
 456         * Don't even try to create tree block for unaligned tree block
 457         * bytenr.
 458         * Such unaligned tree block will free overlapping extent buffer,
 459         * causing use-after-free bugs for fuzzed images.
 460         */
 461        if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
 462                error("tree block bytenr %llu is not aligned to sectorsize %u",
 463                      bytenr, sectorsize);
 464                return ERR_PTR(-EIO);
 465        }
 466
 467        eb = btrfs_find_create_tree_block(fs_info, bytenr);
 468        if (!eb)
 469                return ERR_PTR(-ENOMEM);
 470
 471        if (btrfs_buffer_uptodate(eb, parent_transid))
 472                return eb;
 473
 474        num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
 475        while (1) {
 476                ret = read_whole_eb(fs_info, eb, mirror_num);
 477                if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
 478                    check_tree_block(fs_info, eb) == 0 &&
 479                    verify_parent_transid(&fs_info->extent_cache, eb,
 480                                          parent_transid, ignore) == 0) {
 481                        /*
 482                         * check_tree_block() is less strict to allow btrfs
 483                         * check to get raw eb with bad key order and fix it.
 484                         * But we still need to try to get a good copy if
 485                         * possible, or bad key order can go into tools like
 486                         * btrfs ins dump-tree.
 487                         */
 488                        if (btrfs_header_level(eb))
 489                                ret = btrfs_check_node(fs_info, NULL, eb);
 490                        else
 491                                ret = btrfs_check_leaf(fs_info, NULL, eb);
 492                        if (!ret || candidate_mirror == mirror_num) {
 493                                btrfs_set_buffer_uptodate(eb);
 494                                return eb;
 495                        }
 496                        if (candidate_mirror <= 0)
 497                                candidate_mirror = mirror_num;
 498                }
 499                if (ignore) {
 500                        if (candidate_mirror > 0) {
 501                                mirror_num = candidate_mirror;
 502                                continue;
 503                        }
 504                        if (check_tree_block(fs_info, eb))
 505                                print_tree_block_error(fs_info, eb,
 506                                                check_tree_block(fs_info, eb));
 507                        else
 508                                fprintf(stderr, "Csum didn't match\n");
 509                        ret = -EIO;
 510                        break;
 511                }
 512                if (num_copies == 1) {
 513                        ignore = 1;
 514                        continue;
 515                }
 516                if (btrfs_header_generation(eb) > best_transid) {
 517                        best_transid = btrfs_header_generation(eb);
 518                        good_mirror = mirror_num;
 519                }
 520                mirror_num++;
 521                if (mirror_num > num_copies) {
 522                        if (candidate_mirror > 0)
 523                                mirror_num = candidate_mirror;
 524                        else
 525                                mirror_num = good_mirror;
 526                        ignore = 1;
 527                        continue;
 528                }
 529        }
 530        /*
 531         * We failed to read this tree block, it be should deleted right now
 532         * to avoid stale cache populate the cache.
 533         */
 534        free_extent_buffer(eb);
 535        return ERR_PTR(ret);
 536}
 537
 538int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
 539                     u64 *len, int mirror)
 540{
 541        u64 offset = 0;
 542        struct btrfs_multi_bio *multi = NULL;
 543        struct btrfs_device *device;
 544        int ret = 0;
 545        u64 max_len = *len;
 546
 547        ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
 548                              NULL);
 549        if (ret) {
 550                fprintf(stderr, "Couldn't map the block %llu\n",
 551                                logical + offset);
 552                goto err;
 553        }
 554        device = multi->stripes[0].dev;
 555
 556        if (*len > max_len)
 557                *len = max_len;
 558        if (!device->desc || !device->part) {
 559                ret = -EIO;
 560                goto err;
 561        }
 562
 563        ret = __btrfs_devread(device->desc, device->part, data, *len,
 564                              multi->stripes[0].physical);
 565        if (ret != *len)
 566                ret = -EIO;
 567        else
 568                ret = 0;
 569err:
 570        kfree(multi);
 571        return ret;
 572}
 573
 574void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 575                      u64 objectid)
 576{
 577        root->node = NULL;
 578        root->track_dirty = 0;
 579
 580        root->fs_info = fs_info;
 581        root->objectid = objectid;
 582        root->last_trans = 0;
 583        root->last_inode_alloc = 0;
 584
 585        memset(&root->root_key, 0, sizeof(root->root_key));
 586        memset(&root->root_item, 0, sizeof(root->root_item));
 587        root->root_key.objectid = objectid;
 588}
 589
 590static int find_and_setup_root(struct btrfs_root *tree_root,
 591                               struct btrfs_fs_info *fs_info,
 592                               u64 objectid, struct btrfs_root *root)
 593{
 594        int ret;
 595        u64 generation;
 596
 597        btrfs_setup_root(root, fs_info, objectid);
 598        ret = btrfs_find_last_root(tree_root, objectid,
 599                                   &root->root_item, &root->root_key);
 600        if (ret)
 601                return ret;
 602
 603        generation = btrfs_root_generation(&root->root_item);
 604        root->node = read_tree_block(fs_info,
 605                        btrfs_root_bytenr(&root->root_item), generation);
 606        if (!extent_buffer_uptodate(root->node))
 607                return -EIO;
 608
 609        return 0;
 610}
 611
 612int btrfs_free_fs_root(struct btrfs_root *root)
 613{
 614        if (root->node)
 615                free_extent_buffer(root->node);
 616        kfree(root);
 617        return 0;
 618}
 619
 620static void __free_fs_root(struct rb_node *node)
 621{
 622        struct btrfs_root *root;
 623
 624        root = container_of(node, struct btrfs_root, rb_node);
 625        btrfs_free_fs_root(root);
 626}
 627
 628FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
 629
 630struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
 631                                               struct btrfs_key *location)
 632{
 633        struct btrfs_root *root;
 634        struct btrfs_root *tree_root = fs_info->tree_root;
 635        struct btrfs_path *path;
 636        struct extent_buffer *l;
 637        u64 generation;
 638        int ret = 0;
 639
 640        root = calloc(1, sizeof(*root));
 641        if (!root)
 642                return ERR_PTR(-ENOMEM);
 643        if (location->offset == (u64)-1) {
 644                ret = find_and_setup_root(tree_root, fs_info,
 645                                          location->objectid, root);
 646                if (ret) {
 647                        free(root);
 648                        return ERR_PTR(ret);
 649                }
 650                goto insert;
 651        }
 652
 653        btrfs_setup_root(root, fs_info,
 654                         location->objectid);
 655
 656        path = btrfs_alloc_path();
 657        if (!path) {
 658                free(root);
 659                return ERR_PTR(-ENOMEM);
 660        }
 661
 662        ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
 663        if (ret != 0) {
 664                if (ret > 0)
 665                        ret = -ENOENT;
 666                goto out;
 667        }
 668        l = path->nodes[0];
 669        read_extent_buffer(l, &root->root_item,
 670               btrfs_item_ptr_offset(l, path->slots[0]),
 671               sizeof(root->root_item));
 672        memcpy(&root->root_key, location, sizeof(*location));
 673
 674        /* If this root is already an orphan, no need to read */
 675        if (btrfs_root_refs(&root->root_item) == 0) {
 676                ret = -ENOENT;
 677                goto out;
 678        }
 679        ret = 0;
 680out:
 681        btrfs_free_path(path);
 682        if (ret) {
 683                free(root);
 684                return ERR_PTR(ret);
 685        }
 686        generation = btrfs_root_generation(&root->root_item);
 687        root->node = read_tree_block(fs_info,
 688                        btrfs_root_bytenr(&root->root_item), generation);
 689        if (!extent_buffer_uptodate(root->node)) {
 690                free(root);
 691                return ERR_PTR(-EIO);
 692        }
 693insert:
 694        root->ref_cows = 1;
 695        return root;
 696}
 697
 698static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
 699                                            void *data)
 700{
 701        u64 objectid = *((u64 *)data);
 702        struct btrfs_root *root;
 703
 704        root = rb_entry(node, struct btrfs_root, rb_node);
 705        if (objectid > root->objectid)
 706                return 1;
 707        else if (objectid < root->objectid)
 708                return -1;
 709        else
 710                return 0;
 711}
 712
 713int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
 714{
 715        struct btrfs_root *root;
 716
 717        root = rb_entry(node2, struct btrfs_root, rb_node);
 718        return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
 719}
 720
 721struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
 722                                      struct btrfs_key *location)
 723{
 724        struct btrfs_root *root;
 725        struct rb_node *node;
 726        int ret;
 727        u64 objectid = location->objectid;
 728
 729        if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
 730                return fs_info->tree_root;
 731        if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
 732                return fs_info->chunk_root;
 733        if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
 734                return fs_info->csum_root;
 735        BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID);
 736
 737        node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
 738                         btrfs_fs_roots_compare_objectids, NULL);
 739        if (node)
 740                return container_of(node, struct btrfs_root, rb_node);
 741
 742        root = btrfs_read_fs_root_no_cache(fs_info, location);
 743        if (IS_ERR(root))
 744                return root;
 745
 746        ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
 747                        btrfs_fs_roots_compare_roots);
 748        BUG_ON(ret);
 749        return root;
 750}
 751
 752void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 753{
 754        free(fs_info->tree_root);
 755        free(fs_info->chunk_root);
 756        free(fs_info->csum_root);
 757        free(fs_info->super_copy);
 758        free(fs_info);
 759}
 760
 761struct btrfs_fs_info *btrfs_new_fs_info(void)
 762{
 763        struct btrfs_fs_info *fs_info;
 764
 765        fs_info = calloc(1, sizeof(struct btrfs_fs_info));
 766        if (!fs_info)
 767                return NULL;
 768
 769        fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
 770        fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
 771        fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
 772        fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
 773
 774        if (!fs_info->tree_root || !fs_info->chunk_root ||
 775            !fs_info->csum_root || !fs_info->super_copy)
 776                goto free_all;
 777
 778        extent_io_tree_init(&fs_info->extent_cache);
 779
 780        fs_info->fs_root_tree = RB_ROOT;
 781        cache_tree_init(&fs_info->mapping_tree.cache_tree);
 782
 783        mutex_init(&fs_info->fs_mutex);
 784
 785        return fs_info;
 786free_all:
 787        btrfs_free_fs_info(fs_info);
 788        return NULL;
 789}
 790
 791static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
 792                                      struct btrfs_root *info_root,
 793                                      u64 objectid, char *str)
 794{
 795        struct btrfs_root *root = fs_info->tree_root;
 796        int ret;
 797
 798        ret = find_and_setup_root(root, fs_info, objectid, info_root);
 799        if (ret) {
 800                error("could not setup %s tree", str);
 801                return -EIO;
 802        }
 803
 804        return 0;
 805}
 806
 807static int get_default_subvolume(struct btrfs_fs_info *fs_info,
 808                                 struct btrfs_key *key_ret)
 809{
 810        struct btrfs_root *root = fs_info->tree_root;
 811        struct btrfs_dir_item *dir_item;
 812        struct btrfs_path path;
 813        int ret = 0;
 814
 815        btrfs_init_path(&path);
 816
 817        dir_item = btrfs_lookup_dir_item(NULL, root, &path,
 818                                         BTRFS_ROOT_TREE_DIR_OBJECTID,
 819                                         "default", 7, 0);
 820        if (IS_ERR(dir_item)) {
 821                ret = PTR_ERR(dir_item);
 822                goto out;
 823        }
 824
 825        btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
 826out:
 827        btrfs_release_path(&path);
 828        return ret;
 829}
 830
 831int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
 832{
 833        struct btrfs_super_block *sb = fs_info->super_copy;
 834        struct btrfs_root *root;
 835        struct btrfs_key key;
 836        u64 root_tree_bytenr;
 837        u64 generation;
 838        int ret;
 839
 840        root = fs_info->tree_root;
 841        btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
 842        generation = btrfs_super_generation(sb);
 843
 844        root_tree_bytenr = btrfs_super_root(sb);
 845
 846        root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
 847        if (!extent_buffer_uptodate(root->node)) {
 848                fprintf(stderr, "Couldn't read tree root\n");
 849                return -EIO;
 850        }
 851
 852        ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
 853                                         BTRFS_CSUM_TREE_OBJECTID, "csum");
 854        if (ret)
 855                return ret;
 856        fs_info->csum_root->track_dirty = 1;
 857
 858        fs_info->last_trans_committed = generation;
 859
 860        ret = get_default_subvolume(fs_info, &key);
 861        if (ret) {
 862                /*
 863                 * The default dir item isn't there. Linux kernel behaviour is
 864                 * to silently use the top-level subvolume in this case.
 865                 */
 866                key.objectid = BTRFS_FS_TREE_OBJECTID;
 867                key.type = BTRFS_ROOT_ITEM_KEY;
 868                key.offset = (u64)-1;
 869        }
 870
 871        fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
 872
 873        if (IS_ERR(fs_info->fs_root))
 874                return -EIO;
 875        return 0;
 876}
 877
 878void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
 879{
 880        if (fs_info->csum_root)
 881                free_extent_buffer(fs_info->csum_root->node);
 882        if (fs_info->tree_root)
 883                free_extent_buffer(fs_info->tree_root->node);
 884        if (fs_info->chunk_root)
 885                free_extent_buffer(fs_info->chunk_root->node);
 886}
 887
 888static void free_map_lookup(struct cache_extent *ce)
 889{
 890        struct map_lookup *map;
 891
 892        map = container_of(ce, struct map_lookup, ce);
 893        kfree(map);
 894}
 895
 896FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
 897
 898void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
 899{
 900        free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
 901        extent_io_tree_cleanup(&fs_info->extent_cache);
 902}
 903
 904static int btrfs_scan_fs_devices(struct blk_desc *desc,
 905                                 struct disk_partition *part,
 906                                 struct btrfs_fs_devices **fs_devices)
 907{
 908        u64 total_devs;
 909        int ret;
 910
 911        if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
 912                     desc->blksz) > (part->size << desc->log2blksz)) {
 913                error("superblock end %u is larger than device size " LBAFU,
 914                                BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
 915                                part->size << desc->log2blksz);
 916                return -EINVAL;
 917        }
 918
 919        ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
 920        if (ret) {
 921                /*
 922                 * Avoid showing this when probing for a possible Btrfs
 923                 *
 924                 * fprintf(stderr, "No valid Btrfs found\n");
 925                 */
 926                return ret;
 927        }
 928        return 0;
 929}
 930
 931int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
 932{
 933        u64 features;
 934
 935        features = btrfs_super_incompat_flags(sb) &
 936                   ~BTRFS_FEATURE_INCOMPAT_SUPP;
 937        if (features) {
 938                printk("couldn't open because of unsupported "
 939                       "option features (%llx).\n",
 940                       (unsigned long long)features);
 941                return -ENOTSUPP;
 942        }
 943
 944        features = btrfs_super_incompat_flags(sb);
 945        if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
 946                features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
 947                btrfs_set_super_incompat_flags(sb, features);
 948        }
 949
 950        return 0;
 951}
 952
 953static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
 954{
 955        struct btrfs_super_block *sb = fs_info->super_copy;
 956        u64 chunk_root_bytenr;
 957        u64 generation;
 958        int ret;
 959
 960        btrfs_setup_root(fs_info->chunk_root, fs_info,
 961                        BTRFS_CHUNK_TREE_OBJECTID);
 962
 963        ret = btrfs_read_sys_array(fs_info);
 964        if (ret)
 965                return ret;
 966
 967        generation = btrfs_super_chunk_root_generation(sb);
 968        chunk_root_bytenr = btrfs_super_chunk_root(sb);
 969
 970        fs_info->chunk_root->node = read_tree_block(fs_info,
 971                                                    chunk_root_bytenr,
 972                                                    generation);
 973        if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
 974                error("cannot read chunk root");
 975                return -EIO;
 976        }
 977
 978        ret = btrfs_read_chunk_tree(fs_info);
 979        if (ret) {
 980                fprintf(stderr, "Couldn't read chunk tree\n");
 981                return ret;
 982        }
 983        return 0;
 984}
 985
 986struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
 987                                         struct disk_partition *part)
 988{
 989        struct btrfs_fs_info *fs_info;
 990        struct btrfs_super_block *disk_super;
 991        struct btrfs_fs_devices *fs_devices = NULL;
 992        struct extent_buffer *eb;
 993        int ret;
 994
 995        fs_info = btrfs_new_fs_info();
 996        if (!fs_info) {
 997                fprintf(stderr, "Failed to allocate memory for fs_info\n");
 998                return NULL;
 999        }
1000
1001        ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
1002        if (ret)
1003                goto out;
1004
1005        fs_info->fs_devices = fs_devices;
1006
1007        ret = btrfs_open_devices(fs_devices);
1008        if (ret)
1009                goto out;
1010
1011        disk_super = fs_info->super_copy;
1012        ret = btrfs_read_dev_super(desc, part, disk_super);
1013        if (ret) {
1014                debug("No valid btrfs found\n");
1015                goto out_devices;
1016        }
1017
1018        if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
1019                fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
1020                goto out_devices;
1021        }
1022
1023        ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
1024        if (btrfs_fs_incompat(fs_info, METADATA_UUID))
1025                ASSERT(!memcmp(disk_super->metadata_uuid,
1026                               fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
1027
1028        fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
1029        fs_info->nodesize = btrfs_super_nodesize(disk_super);
1030        fs_info->stripesize = btrfs_super_stripesize(disk_super);
1031
1032        ret = btrfs_check_fs_compatibility(fs_info->super_copy);
1033        if (ret)
1034                goto out_devices;
1035
1036        ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
1037        if (ret)
1038                goto out_chunk;
1039
1040        /* Chunk tree root is unable to read, return directly */
1041        if (!fs_info->chunk_root)
1042                return fs_info;
1043
1044        eb = fs_info->chunk_root->node;
1045        read_extent_buffer(eb, fs_info->chunk_tree_uuid,
1046                           btrfs_header_chunk_tree_uuid(eb),
1047                           BTRFS_UUID_SIZE);
1048
1049        ret = btrfs_setup_all_roots(fs_info);
1050        if (ret)
1051                goto out_chunk;
1052
1053        return fs_info;
1054
1055out_chunk:
1056        btrfs_release_all_roots(fs_info);
1057        btrfs_cleanup_all_caches(fs_info);
1058out_devices:
1059        btrfs_close_devices(fs_devices);
1060out:
1061        btrfs_free_fs_info(fs_info);
1062        return NULL;
1063}
1064
1065int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
1066{
1067        int ret;
1068
1069        free_fs_roots_tree(&fs_info->fs_root_tree);
1070
1071        btrfs_release_all_roots(fs_info);
1072        ret = btrfs_close_devices(fs_info->fs_devices);
1073        btrfs_cleanup_all_caches(fs_info);
1074        btrfs_free_fs_info(fs_info);
1075        return ret;
1076}
1077
1078int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
1079{
1080        int ret;
1081
1082        ret = extent_buffer_uptodate(buf);
1083        if (!ret)
1084                return ret;
1085
1086        ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
1087                                    parent_transid, 1);
1088        return !ret;
1089}
1090
1091int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
1092{
1093        return set_extent_buffer_uptodate(eb);
1094}
1095