uboot/fs/btrfs/volumes.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2#include <stdlib.h>
   3#include <common.h>
   4#include <fs_internal.h>
   5#include "ctree.h"
   6#include "disk-io.h"
   7#include "volumes.h"
   8#include "extent-io.h"
   9
  10const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
  11        [BTRFS_RAID_RAID10] = {
  12                .sub_stripes    = 2,
  13                .dev_stripes    = 1,
  14                .devs_max       = 0,    /* 0 == as many as possible */
  15                .devs_min       = 4,
  16                .tolerated_failures = 1,
  17                .devs_increment = 2,
  18                .ncopies        = 2,
  19                .nparity        = 0,
  20                .raid_name      = "raid10",
  21                .bg_flag        = BTRFS_BLOCK_GROUP_RAID10,
  22        },
  23        [BTRFS_RAID_RAID1] = {
  24                .sub_stripes    = 1,
  25                .dev_stripes    = 1,
  26                .devs_max       = 2,
  27                .devs_min       = 2,
  28                .tolerated_failures = 1,
  29                .devs_increment = 2,
  30                .ncopies        = 2,
  31                .nparity        = 0,
  32                .raid_name      = "raid1",
  33                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
  34        },
  35        [BTRFS_RAID_RAID1C3] = {
  36                .sub_stripes    = 1,
  37                .dev_stripes    = 1,
  38                .devs_max       = 3,
  39                .devs_min       = 3,
  40                .tolerated_failures = 2,
  41                .devs_increment = 3,
  42                .ncopies        = 3,
  43                .raid_name      = "raid1c3",
  44                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C3,
  45        },
  46        [BTRFS_RAID_RAID1C4] = {
  47                .sub_stripes    = 1,
  48                .dev_stripes    = 1,
  49                .devs_max       = 4,
  50                .devs_min       = 4,
  51                .tolerated_failures = 3,
  52                .devs_increment = 4,
  53                .ncopies        = 4,
  54                .raid_name      = "raid1c4",
  55                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C4,
  56        },
  57        [BTRFS_RAID_DUP] = {
  58                .sub_stripes    = 1,
  59                .dev_stripes    = 2,
  60                .devs_max       = 1,
  61                .devs_min       = 1,
  62                .tolerated_failures = 0,
  63                .devs_increment = 1,
  64                .ncopies        = 2,
  65                .nparity        = 0,
  66                .raid_name      = "dup",
  67                .bg_flag        = BTRFS_BLOCK_GROUP_DUP,
  68        },
  69        [BTRFS_RAID_RAID0] = {
  70                .sub_stripes    = 1,
  71                .dev_stripes    = 1,
  72                .devs_max       = 0,
  73                .devs_min       = 2,
  74                .tolerated_failures = 0,
  75                .devs_increment = 1,
  76                .ncopies        = 1,
  77                .nparity        = 0,
  78                .raid_name      = "raid0",
  79                .bg_flag        = BTRFS_BLOCK_GROUP_RAID0,
  80        },
  81        [BTRFS_RAID_SINGLE] = {
  82                .sub_stripes    = 1,
  83                .dev_stripes    = 1,
  84                .devs_max       = 1,
  85                .devs_min       = 1,
  86                .tolerated_failures = 0,
  87                .devs_increment = 1,
  88                .ncopies        = 1,
  89                .nparity        = 0,
  90                .raid_name      = "single",
  91                .bg_flag        = 0,
  92        },
  93        [BTRFS_RAID_RAID5] = {
  94                .sub_stripes    = 1,
  95                .dev_stripes    = 1,
  96                .devs_max       = 0,
  97                .devs_min       = 2,
  98                .tolerated_failures = 1,
  99                .devs_increment = 1,
 100                .ncopies        = 1,
 101                .nparity        = 1,
 102                .raid_name      = "raid5",
 103                .bg_flag        = BTRFS_BLOCK_GROUP_RAID5,
 104        },
 105        [BTRFS_RAID_RAID6] = {
 106                .sub_stripes    = 1,
 107                .dev_stripes    = 1,
 108                .devs_max       = 0,
 109                .devs_min       = 3,
 110                .tolerated_failures = 2,
 111                .devs_increment = 1,
 112                .ncopies        = 1,
 113                .nparity        = 2,
 114                .raid_name      = "raid6",
 115                .bg_flag        = BTRFS_BLOCK_GROUP_RAID6,
 116        },
 117};
 118
 119struct stripe {
 120        struct btrfs_device *dev;
 121        u64 physical;
 122};
 123
 124static inline int nr_parity_stripes(struct map_lookup *map)
 125{
 126        if (map->type & BTRFS_BLOCK_GROUP_RAID5)
 127                return 1;
 128        else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
 129                return 2;
 130        else
 131                return 0;
 132}
 133
 134static inline int nr_data_stripes(struct map_lookup *map)
 135{
 136        return map->num_stripes - nr_parity_stripes(map);
 137}
 138
 139#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
 140
 141static LIST_HEAD(fs_uuids);
 142
 143/*
 144 * Find a device specified by @devid or @uuid in the list of @fs_devices, or
 145 * return NULL.
 146 *
 147 * If devid and uuid are both specified, the match must be exact, otherwise
 148 * only devid is used.
 149 */
 150static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
 151                u64 devid, u8 *uuid)
 152{
 153        struct list_head *head = &fs_devices->devices;
 154        struct btrfs_device *dev;
 155
 156        list_for_each_entry(dev, head, dev_list) {
 157                if (dev->devid == devid &&
 158                    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
 159                        return dev;
 160                }
 161        }
 162        return NULL;
 163}
 164
 165static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid)
 166{
 167        struct btrfs_fs_devices *fs_devices;
 168
 169        list_for_each_entry(fs_devices, &fs_uuids, list) {
 170                if (metadata_uuid && (memcmp(fsid, fs_devices->fsid,
 171                                             BTRFS_FSID_SIZE) == 0) &&
 172                    (memcmp(metadata_uuid, fs_devices->metadata_uuid,
 173                            BTRFS_FSID_SIZE) == 0)) {
 174                        return fs_devices;
 175                } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){
 176                        return fs_devices;
 177                }
 178        }
 179        return NULL;
 180}
 181
 182static int device_list_add(struct btrfs_super_block *disk_super,
 183                           u64 devid, struct blk_desc *desc,
 184                           struct disk_partition *part,
 185                           struct btrfs_fs_devices **fs_devices_ret)
 186{
 187        struct btrfs_device *device;
 188        struct btrfs_fs_devices *fs_devices;
 189        u64 found_transid = btrfs_super_generation(disk_super);
 190        bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
 191                BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
 192
 193        if (metadata_uuid)
 194                fs_devices = find_fsid(disk_super->fsid,
 195                                       disk_super->metadata_uuid);
 196        else
 197                fs_devices = find_fsid(disk_super->fsid, NULL);
 198
 199        if (!fs_devices) {
 200                fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
 201                if (!fs_devices)
 202                        return -ENOMEM;
 203                INIT_LIST_HEAD(&fs_devices->devices);
 204                list_add(&fs_devices->list, &fs_uuids);
 205                memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
 206                if (metadata_uuid)
 207                        memcpy(fs_devices->metadata_uuid,
 208                               disk_super->metadata_uuid, BTRFS_FSID_SIZE);
 209                else
 210                        memcpy(fs_devices->metadata_uuid, fs_devices->fsid,
 211                               BTRFS_FSID_SIZE);
 212
 213                fs_devices->latest_devid = devid;
 214                fs_devices->latest_trans = found_transid;
 215                fs_devices->lowest_devid = (u64)-1;
 216                device = NULL;
 217        } else {
 218                device = find_device(fs_devices, devid,
 219                                    disk_super->dev_item.uuid);
 220        }
 221        if (!device) {
 222                device = kzalloc(sizeof(*device), GFP_NOFS);
 223                if (!device) {
 224                        /* we can safely leave the fs_devices entry around */
 225                        return -ENOMEM;
 226                }
 227                device->devid = devid;
 228                device->desc = desc;
 229                device->part = part;
 230                device->generation = found_transid;
 231                memcpy(device->uuid, disk_super->dev_item.uuid,
 232                       BTRFS_UUID_SIZE);
 233                device->total_devs = btrfs_super_num_devices(disk_super);
 234                device->super_bytes_used = btrfs_super_bytes_used(disk_super);
 235                device->total_bytes =
 236                        btrfs_stack_device_total_bytes(&disk_super->dev_item);
 237                device->bytes_used =
 238                        btrfs_stack_device_bytes_used(&disk_super->dev_item);
 239                list_add(&device->dev_list, &fs_devices->devices);
 240                device->fs_devices = fs_devices;
 241        } else if (!device->desc || !device->part) {
 242                /*
 243                 * The existing device has newer generation, so this one could
 244                 * be a stale one, don't add it.
 245                 */
 246                if (found_transid < device->generation) {
 247                        error(
 248        "adding devid %llu gen %llu but found an existing device gen %llu",
 249                                device->devid, found_transid,
 250                                device->generation);
 251                        return -EEXIST;
 252                } else {
 253                        device->desc = desc;
 254                        device->part = part;
 255                }
 256        }
 257
 258
 259        if (found_transid > fs_devices->latest_trans) {
 260                fs_devices->latest_devid = devid;
 261                fs_devices->latest_trans = found_transid;
 262        }
 263        if (fs_devices->lowest_devid > devid) {
 264                fs_devices->lowest_devid = devid;
 265        }
 266        *fs_devices_ret = fs_devices;
 267        return 0;
 268}
 269
 270int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 271{
 272        struct btrfs_fs_devices *seed_devices;
 273        struct btrfs_device *device;
 274        int ret = 0;
 275
 276again:
 277        if (!fs_devices)
 278                return 0;
 279        while (!list_empty(&fs_devices->devices)) {
 280                device = list_entry(fs_devices->devices.next,
 281                                    struct btrfs_device, dev_list);
 282                list_del(&device->dev_list);
 283                /* free the memory */
 284                free(device);
 285        }
 286
 287        seed_devices = fs_devices->seed;
 288        fs_devices->seed = NULL;
 289        if (seed_devices) {
 290                struct btrfs_fs_devices *orig;
 291
 292                orig = fs_devices;
 293                fs_devices = seed_devices;
 294                list_del(&orig->list);
 295                free(orig);
 296                goto again;
 297        } else {
 298                list_del(&fs_devices->list);
 299                free(fs_devices);
 300        }
 301
 302        return ret;
 303}
 304
 305void btrfs_close_all_devices(void)
 306{
 307        struct btrfs_fs_devices *fs_devices;
 308
 309        while (!list_empty(&fs_uuids)) {
 310                fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
 311                                        list);
 312                btrfs_close_devices(fs_devices);
 313        }
 314}
 315
 316int btrfs_open_devices(struct btrfs_fs_devices *fs_devices)
 317{
 318        struct btrfs_device *device;
 319
 320        list_for_each_entry(device, &fs_devices->devices, dev_list) {
 321                if (!device->desc || !device->part) {
 322                        printf("no device found for devid %llu, skip it \n",
 323                                device->devid);
 324                        continue;
 325                }
 326        }
 327        return 0;
 328}
 329
 330int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
 331                          struct btrfs_fs_devices **fs_devices_ret,
 332                          u64 *total_devs)
 333{
 334        struct btrfs_super_block *disk_super;
 335        char buf[BTRFS_SUPER_INFO_SIZE];
 336        int ret;
 337        u64 devid;
 338
 339        disk_super = (struct btrfs_super_block *)buf;
 340        ret = btrfs_read_dev_super(desc, part, disk_super);
 341        if (ret < 0)
 342                return -EIO;
 343        devid = btrfs_stack_device_id(&disk_super->dev_item);
 344        if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
 345                *total_devs = 1;
 346        else
 347                *total_devs = btrfs_super_num_devices(disk_super);
 348
 349        ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret);
 350
 351        return ret;
 352}
 353
 354struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
 355                                       u8 *uuid, u8 *fsid)
 356{
 357        struct btrfs_device *device;
 358        struct btrfs_fs_devices *cur_devices;
 359
 360        cur_devices = fs_info->fs_devices;
 361        while (cur_devices) {
 362                if (!fsid ||
 363                   !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
 364                        device = find_device(cur_devices, devid, uuid);
 365                        if (device)
 366                                return device;
 367                }
 368                cur_devices = cur_devices->seed;
 369        }
 370        return NULL;
 371}
 372
 373static struct btrfs_device *fill_missing_device(u64 devid)
 374{
 375        struct btrfs_device *device;
 376
 377        device = kzalloc(sizeof(*device), GFP_NOFS);
 378        return device;
 379}
 380
 381/*
 382 * slot == -1: SYSTEM chunk
 383 * return -EIO on error, otherwise return 0
 384 */
 385int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
 386                            struct extent_buffer *leaf,
 387                            struct btrfs_chunk *chunk,
 388                            int slot, u64 logical)
 389{
 390        u64 length;
 391        u64 stripe_len;
 392        u16 num_stripes;
 393        u16 sub_stripes;
 394        u64 type;
 395        u32 chunk_ondisk_size;
 396        u32 sectorsize = fs_info->sectorsize;
 397
 398        /*
 399         * Basic chunk item size check.  Note that btrfs_chunk already contains
 400         * one stripe, so no "==" check.
 401         */
 402        if (slot >= 0 &&
 403            btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
 404                error("invalid chunk item size, have %u expect [%zu, %zu)",
 405                        btrfs_item_size_nr(leaf, slot),
 406                        sizeof(struct btrfs_chunk),
 407                        BTRFS_LEAF_DATA_SIZE(fs_info));
 408                return -EUCLEAN;
 409        }
 410        length = btrfs_chunk_length(leaf, chunk);
 411        stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 412        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
 413        sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
 414        type = btrfs_chunk_type(leaf, chunk);
 415
 416        if (num_stripes == 0) {
 417                error("invalid num_stripes, have %u expect non-zero",
 418                        num_stripes);
 419                return -EUCLEAN;
 420        }
 421        if (slot >= 0 && btrfs_chunk_item_size(num_stripes) !=
 422            btrfs_item_size_nr(leaf, slot)) {
 423                error("invalid chunk item size, have %u expect %lu",
 424                        btrfs_item_size_nr(leaf, slot),
 425                        btrfs_chunk_item_size(num_stripes));
 426                return -EUCLEAN;
 427        }
 428
 429        /*
 430         * These valid checks may be insufficient to cover every corner cases.
 431         */
 432        if (!IS_ALIGNED(logical, sectorsize)) {
 433                error("invalid chunk logical %llu",  logical);
 434                return -EIO;
 435        }
 436        if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) {
 437                error("invalid chunk sectorsize %llu",
 438                      (unsigned long long)btrfs_chunk_sector_size(leaf, chunk));
 439                return -EIO;
 440        }
 441        if (!length || !IS_ALIGNED(length, sectorsize)) {
 442                error("invalid chunk length %llu",  length);
 443                return -EIO;
 444        }
 445        if (stripe_len != BTRFS_STRIPE_LEN) {
 446                error("invalid chunk stripe length: %llu", stripe_len);
 447                return -EIO;
 448        }
 449        /* Check on chunk item type */
 450        if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
 451                error("invalid chunk type %llu", type);
 452                return -EIO;
 453        }
 454        if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
 455                     BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
 456                error("unrecognized chunk type: %llu",
 457                      ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
 458                        BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
 459                return -EIO;
 460        }
 461        if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
 462                error("missing chunk type flag: %llu", type);
 463                return -EIO;
 464        }
 465        if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) ||
 466              (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) {
 467                error("conflicting chunk type detected: %llu", type);
 468                return -EIO;
 469        }
 470        if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
 471            !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
 472                error("conflicting chunk profile detected: %llu", type);
 473                return -EIO;
 474        }
 475
 476        chunk_ondisk_size = btrfs_chunk_item_size(num_stripes);
 477        /*
 478         * Btrfs_chunk contains at least one stripe, and for sys_chunk
 479         * it can't exceed the system chunk array size
 480         * For normal chunk, it should match its chunk item size.
 481         */
 482        if (num_stripes < 1 ||
 483            (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
 484            (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) {
 485                error("invalid num_stripes: %u", num_stripes);
 486                return -EIO;
 487        }
 488        /*
 489         * Device number check against profile
 490         */
 491        if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 ||
 492                  !IS_ALIGNED(num_stripes, sub_stripes))) ||
 493            (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
 494            (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) ||
 495            (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) ||
 496            (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
 497            (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
 498            (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
 499            ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
 500             num_stripes != 1)) {
 501                error("Invalid num_stripes:sub_stripes %u:%u for profile %llu",
 502                      num_stripes, sub_stripes,
 503                      type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
 504                return -EIO;
 505        }
 506
 507        return 0;
 508}
 509
 510/*
 511 * Slot is used to verify the chunk item is valid
 512 *
 513 * For sys chunk in superblock, pass -1 to indicate sys chunk.
 514 */
 515static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
 516                          struct extent_buffer *leaf,
 517                          struct btrfs_chunk *chunk, int slot)
 518{
 519        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 520        struct map_lookup *map;
 521        struct cache_extent *ce;
 522        u64 logical;
 523        u64 length;
 524        u64 devid;
 525        u8 uuid[BTRFS_UUID_SIZE];
 526        int num_stripes;
 527        int ret;
 528        int i;
 529
 530        logical = key->offset;
 531        length = btrfs_chunk_length(leaf, chunk);
 532        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
 533        /* Validation check */
 534        ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical);
 535        if (ret) {
 536                error("%s checksums match, but it has an invalid chunk, %s",
 537                      (slot == -1) ? "Superblock" : "Metadata",
 538                      (slot == -1) ? "try btrfsck --repair -s <superblock> ie, 0,1,2" : "");
 539                return ret;
 540        }
 541
 542        ce = search_cache_extent(&map_tree->cache_tree, logical);
 543
 544        /* already mapped? */
 545        if (ce && ce->start <= logical && ce->start + ce->size > logical) {
 546                return 0;
 547        }
 548
 549        map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
 550        if (!map)
 551                return -ENOMEM;
 552
 553        map->ce.start = logical;
 554        map->ce.size = length;
 555        map->num_stripes = num_stripes;
 556        map->io_width = btrfs_chunk_io_width(leaf, chunk);
 557        map->io_align = btrfs_chunk_io_align(leaf, chunk);
 558        map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
 559        map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
 560        map->type = btrfs_chunk_type(leaf, chunk);
 561        map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
 562
 563        for (i = 0; i < num_stripes; i++) {
 564                map->stripes[i].physical =
 565                        btrfs_stripe_offset_nr(leaf, chunk, i);
 566                devid = btrfs_stripe_devid_nr(leaf, chunk, i);
 567                read_extent_buffer(leaf, uuid, (unsigned long)
 568                                   btrfs_stripe_dev_uuid_nr(chunk, i),
 569                                   BTRFS_UUID_SIZE);
 570                map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid,
 571                                                        NULL);
 572                if (!map->stripes[i].dev) {
 573                        map->stripes[i].dev = fill_missing_device(devid);
 574                        printf("warning, device %llu is missing\n",
 575                               (unsigned long long)devid);
 576                        list_add(&map->stripes[i].dev->dev_list,
 577                                 &fs_info->fs_devices->devices);
 578                }
 579
 580        }
 581        ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
 582        if (ret < 0) {
 583                errno = -ret;
 584                error("failed to add chunk map start=%llu len=%llu: %d (%m)",
 585                      map->ce.start, map->ce.size, ret);
 586        }
 587
 588        return ret;
 589}
 590
 591static int fill_device_from_item(struct extent_buffer *leaf,
 592                                 struct btrfs_dev_item *dev_item,
 593                                 struct btrfs_device *device)
 594{
 595        unsigned long ptr;
 596
 597        device->devid = btrfs_device_id(leaf, dev_item);
 598        device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
 599        device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
 600        device->type = btrfs_device_type(leaf, dev_item);
 601        device->io_align = btrfs_device_io_align(leaf, dev_item);
 602        device->io_width = btrfs_device_io_width(leaf, dev_item);
 603        device->sector_size = btrfs_device_sector_size(leaf, dev_item);
 604
 605        ptr = (unsigned long)btrfs_device_uuid(dev_item);
 606        read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
 607
 608        return 0;
 609}
 610
 611static int read_one_dev(struct btrfs_fs_info *fs_info,
 612                        struct extent_buffer *leaf,
 613                        struct btrfs_dev_item *dev_item)
 614{
 615        struct btrfs_device *device;
 616        u64 devid;
 617        int ret = 0;
 618        u8 fs_uuid[BTRFS_UUID_SIZE];
 619        u8 dev_uuid[BTRFS_UUID_SIZE];
 620
 621        devid = btrfs_device_id(leaf, dev_item);
 622        read_extent_buffer(leaf, dev_uuid,
 623                           (unsigned long)btrfs_device_uuid(dev_item),
 624                           BTRFS_UUID_SIZE);
 625        read_extent_buffer(leaf, fs_uuid,
 626                           (unsigned long)btrfs_device_fsid(dev_item),
 627                           BTRFS_FSID_SIZE);
 628
 629        if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) {
 630                error("Seed device is not yet supported\n");
 631                return -ENOTSUPP;
 632        }
 633
 634        device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
 635        if (!device) {
 636                device = kzalloc(sizeof(*device), GFP_NOFS);
 637                if (!device)
 638                        return -ENOMEM;
 639                list_add(&device->dev_list,
 640                         &fs_info->fs_devices->devices);
 641        }
 642
 643        fill_device_from_item(leaf, dev_item, device);
 644        fs_info->fs_devices->total_rw_bytes +=
 645                btrfs_device_total_bytes(leaf, dev_item);
 646        return ret;
 647}
 648
 649int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
 650{
 651        struct btrfs_super_block *super_copy = fs_info->super_copy;
 652        struct extent_buffer *sb;
 653        struct btrfs_disk_key *disk_key;
 654        struct btrfs_chunk *chunk;
 655        u8 *array_ptr;
 656        unsigned long sb_array_offset;
 657        int ret = 0;
 658        u32 num_stripes;
 659        u32 array_size;
 660        u32 len = 0;
 661        u32 cur_offset;
 662        struct btrfs_key key;
 663
 664        if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) {
 665                printf("ERROR: nodesize %u too small to read superblock\n",
 666                                fs_info->nodesize);
 667                return -EINVAL;
 668        }
 669        sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET,
 670                                       BTRFS_SUPER_INFO_SIZE);
 671        if (!sb)
 672                return -ENOMEM;
 673        btrfs_set_buffer_uptodate(sb);
 674        write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy));
 675        array_size = btrfs_super_sys_array_size(super_copy);
 676
 677        array_ptr = super_copy->sys_chunk_array;
 678        sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
 679        cur_offset = 0;
 680
 681        while (cur_offset < array_size) {
 682                disk_key = (struct btrfs_disk_key *)array_ptr;
 683                len = sizeof(*disk_key);
 684                if (cur_offset + len > array_size)
 685                        goto out_short_read;
 686
 687                btrfs_disk_key_to_cpu(&key, disk_key);
 688
 689                array_ptr += len;
 690                sb_array_offset += len;
 691                cur_offset += len;
 692
 693                if (key.type == BTRFS_CHUNK_ITEM_KEY) {
 694                        chunk = (struct btrfs_chunk *)sb_array_offset;
 695                        /*
 696                         * At least one btrfs_chunk with one stripe must be
 697                         * present, exact stripe count check comes afterwards
 698                         */
 699                        len = btrfs_chunk_item_size(1);
 700                        if (cur_offset + len > array_size)
 701                                goto out_short_read;
 702
 703                        num_stripes = btrfs_chunk_num_stripes(sb, chunk);
 704                        if (!num_stripes) {
 705                                printk(
 706            "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
 707                                        num_stripes, cur_offset);
 708                                ret = -EIO;
 709                                break;
 710                        }
 711
 712                        len = btrfs_chunk_item_size(num_stripes);
 713                        if (cur_offset + len > array_size)
 714                                goto out_short_read;
 715
 716                        ret = read_one_chunk(fs_info, &key, sb, chunk, -1);
 717                        if (ret)
 718                                break;
 719                } else {
 720                        printk(
 721                "ERROR: unexpected item type %u in sys_array at offset %u\n",
 722                                (u32)key.type, cur_offset);
 723                        ret = -EIO;
 724                        break;
 725                }
 726                array_ptr += len;
 727                sb_array_offset += len;
 728                cur_offset += len;
 729        }
 730        free_extent_buffer(sb);
 731        return ret;
 732
 733out_short_read:
 734        printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
 735                        len, cur_offset);
 736        free_extent_buffer(sb);
 737        return -EIO;
 738}
 739
 740int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
 741{
 742        struct btrfs_path *path;
 743        struct extent_buffer *leaf;
 744        struct btrfs_key key;
 745        struct btrfs_key found_key;
 746        struct btrfs_root *root = fs_info->chunk_root;
 747        int ret;
 748        int slot;
 749
 750        path = btrfs_alloc_path();
 751        if (!path)
 752                return -ENOMEM;
 753
 754        /*
 755         * Read all device items, and then all the chunk items. All
 756         * device items are found before any chunk item (their object id
 757         * is smaller than the lowest possible object id for a chunk
 758         * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
 759         */
 760        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
 761        key.offset = 0;
 762        key.type = 0;
 763        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 764        if (ret < 0)
 765                goto error;
 766        while(1) {
 767                leaf = path->nodes[0];
 768                slot = path->slots[0];
 769                if (slot >= btrfs_header_nritems(leaf)) {
 770                        ret = btrfs_next_leaf(root, path);
 771                        if (ret == 0)
 772                                continue;
 773                        if (ret < 0)
 774                                goto error;
 775                        break;
 776                }
 777                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 778                if (found_key.type == BTRFS_DEV_ITEM_KEY) {
 779                        struct btrfs_dev_item *dev_item;
 780                        dev_item = btrfs_item_ptr(leaf, slot,
 781                                                  struct btrfs_dev_item);
 782                        ret = read_one_dev(fs_info, leaf, dev_item);
 783                        if (ret < 0)
 784                                goto error;
 785                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
 786                        struct btrfs_chunk *chunk;
 787                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 788                        ret = read_one_chunk(fs_info, &found_key, leaf, chunk,
 789                                             slot);
 790                        if (ret < 0)
 791                                goto error;
 792                }
 793                path->slots[0]++;
 794        }
 795
 796        ret = 0;
 797error:
 798        btrfs_free_path(path);
 799        return ret;
 800}
 801
 802/*
 803 * Get stripe length from chunk item and its stripe items
 804 *
 805 * Caller should only call this function after validating the chunk item
 806 * by using btrfs_check_chunk_valid().
 807 */
 808u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
 809                        struct extent_buffer *leaf,
 810                        struct btrfs_chunk *chunk)
 811{
 812        u64 stripe_len;
 813        u64 chunk_len;
 814        u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
 815        u64 profile = btrfs_chunk_type(leaf, chunk) &
 816                      BTRFS_BLOCK_GROUP_PROFILE_MASK;
 817
 818        chunk_len = btrfs_chunk_length(leaf, chunk);
 819
 820        switch (profile) {
 821        case 0: /* Single profile */
 822        case BTRFS_BLOCK_GROUP_RAID1:
 823        case BTRFS_BLOCK_GROUP_RAID1C3:
 824        case BTRFS_BLOCK_GROUP_RAID1C4:
 825        case BTRFS_BLOCK_GROUP_DUP:
 826                stripe_len = chunk_len;
 827                break;
 828        case BTRFS_BLOCK_GROUP_RAID0:
 829                stripe_len = chunk_len / num_stripes;
 830                break;
 831        case BTRFS_BLOCK_GROUP_RAID5:
 832                stripe_len = chunk_len / (num_stripes - 1);
 833                break;
 834        case BTRFS_BLOCK_GROUP_RAID6:
 835                stripe_len = chunk_len / (num_stripes - 2);
 836                break;
 837        case BTRFS_BLOCK_GROUP_RAID10:
 838                stripe_len = chunk_len / (num_stripes /
 839                                btrfs_chunk_sub_stripes(leaf, chunk));
 840                break;
 841        default:
 842                /* Invalid chunk profile found */
 843                BUG_ON(1);
 844        }
 845        return stripe_len;
 846}
 847
 848int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 849{
 850        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 851        struct cache_extent *ce;
 852        struct map_lookup *map;
 853        int ret;
 854
 855        ce = search_cache_extent(&map_tree->cache_tree, logical);
 856        if (!ce) {
 857                fprintf(stderr, "No mapping for %llu-%llu\n",
 858                        (unsigned long long)logical,
 859                        (unsigned long long)logical+len);
 860                return 1;
 861        }
 862        if (ce->start > logical || ce->start + ce->size < logical) {
 863                fprintf(stderr, "Invalid mapping for %llu-%llu, got "
 864                        "%llu-%llu\n", (unsigned long long)logical,
 865                        (unsigned long long)logical+len,
 866                        (unsigned long long)ce->start,
 867                        (unsigned long long)ce->start + ce->size);
 868                return 1;
 869        }
 870        map = container_of(ce, struct map_lookup, ce);
 871
 872        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
 873                         BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))
 874                ret = map->num_stripes;
 875        else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
 876                ret = map->sub_stripes;
 877        else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
 878                ret = 2;
 879        else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
 880                ret = 3;
 881        else
 882                ret = 1;
 883        return ret;
 884}
 885
 886int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical,
 887                  u64 *size, u64 type)
 888{
 889        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 890        struct cache_extent *ce;
 891        struct map_lookup *map;
 892        u64 cur = *logical;
 893
 894        ce = search_cache_extent(&map_tree->cache_tree, cur);
 895
 896        while (ce) {
 897                /*
 898                 * only jump to next bg if our cur is not 0
 899                 * As the initial logical for btrfs_next_bg() is 0, and
 900                 * if we jump to next bg, we skipped a valid bg.
 901                 */
 902                if (cur) {
 903                        ce = next_cache_extent(ce);
 904                        if (!ce)
 905                                return -ENOENT;
 906                }
 907
 908                cur = ce->start;
 909                map = container_of(ce, struct map_lookup, ce);
 910                if (map->type & type) {
 911                        *logical = ce->start;
 912                        *size = ce->size;
 913                        return 0;
 914                }
 915                if (!cur)
 916                        ce = next_cache_extent(ce);
 917        }
 918
 919        return -ENOENT;
 920}
 921
 922static inline int parity_smaller(u64 a, u64 b)
 923{
 924        return a > b;
 925}
 926
 927/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
 928static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
 929{
 930        struct btrfs_bio_stripe s;
 931        int i;
 932        u64 l;
 933        int again = 1;
 934
 935        while (again) {
 936                again = 0;
 937                for (i = 0; i < bbio->num_stripes - 1; i++) {
 938                        if (parity_smaller(raid_map[i], raid_map[i+1])) {
 939                                s = bbio->stripes[i];
 940                                l = raid_map[i];
 941                                bbio->stripes[i] = bbio->stripes[i+1];
 942                                raid_map[i] = raid_map[i+1];
 943                                bbio->stripes[i+1] = s;
 944                                raid_map[i+1] = l;
 945                                again = 1;
 946                        }
 947                }
 948        }
 949}
 950
 951int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 952                      u64 logical, u64 *length, u64 *type,
 953                      struct btrfs_multi_bio **multi_ret, int mirror_num,
 954                      u64 **raid_map_ret)
 955{
 956        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 957        struct cache_extent *ce;
 958        struct map_lookup *map;
 959        u64 offset;
 960        u64 stripe_offset;
 961        u64 *raid_map = NULL;
 962        int stripe_nr;
 963        int stripes_allocated = 8;
 964        int stripes_required = 1;
 965        int stripe_index;
 966        int i;
 967        struct btrfs_multi_bio *multi = NULL;
 968
 969        if (multi_ret && rw == READ) {
 970                stripes_allocated = 1;
 971        }
 972again:
 973        ce = search_cache_extent(&map_tree->cache_tree, logical);
 974        if (!ce) {
 975                kfree(multi);
 976                *length = (u64)-1;
 977                return -ENOENT;
 978        }
 979        if (ce->start > logical) {
 980                kfree(multi);
 981                *length = ce->start - logical;
 982                return -ENOENT;
 983        }
 984
 985        if (multi_ret) {
 986                multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
 987                                GFP_NOFS);
 988                if (!multi)
 989                        return -ENOMEM;
 990        }
 991        map = container_of(ce, struct map_lookup, ce);
 992        offset = logical - ce->start;
 993
 994        if (rw == WRITE) {
 995                if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
 996                                 BTRFS_BLOCK_GROUP_RAID1C3 |
 997                                 BTRFS_BLOCK_GROUP_RAID1C4 |
 998                                 BTRFS_BLOCK_GROUP_DUP)) {
 999                        stripes_required = map->num_stripes;
1000                } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1001                        stripes_required = map->sub_stripes;
1002                }
1003        }
1004        if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
1005            && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
1006                    /* RAID[56] write or recovery. Return all stripes */
1007                    stripes_required = map->num_stripes;
1008
1009                    /* Only allocate the map if we've already got a large enough multi_ret */
1010                    if (stripes_allocated >= stripes_required) {
1011                            raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
1012                            if (!raid_map) {
1013                                    kfree(multi);
1014                                    return -ENOMEM;
1015                            }
1016                    }
1017        }
1018
1019        /* if our multi bio struct is too small, back off and try again */
1020        if (multi_ret && stripes_allocated < stripes_required) {
1021                stripes_allocated = stripes_required;
1022                kfree(multi);
1023                multi = NULL;
1024                goto again;
1025        }
1026        stripe_nr = offset;
1027        /*
1028         * stripe_nr counts the total number of stripes we have to stride
1029         * to get to this block
1030         */
1031        stripe_nr = stripe_nr / map->stripe_len;
1032
1033        stripe_offset = stripe_nr * (u64)map->stripe_len;
1034        BUG_ON(offset < stripe_offset);
1035
1036        /* stripe_offset is the offset of this block in its stripe*/
1037        stripe_offset = offset - stripe_offset;
1038
1039        if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
1040                         BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 |
1041                         BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
1042                         BTRFS_BLOCK_GROUP_RAID10 |
1043                         BTRFS_BLOCK_GROUP_DUP)) {
1044                /* we limit the length of each bio to what fits in a stripe */
1045                *length = min_t(u64, ce->size - offset,
1046                              map->stripe_len - stripe_offset);
1047        } else {
1048                *length = ce->size - offset;
1049        }
1050
1051        if (!multi_ret)
1052                goto out;
1053
1054        multi->num_stripes = 1;
1055        stripe_index = 0;
1056        if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1057                         BTRFS_BLOCK_GROUP_RAID1C3 |
1058                         BTRFS_BLOCK_GROUP_RAID1C4)) {
1059                if (rw == WRITE)
1060                        multi->num_stripes = map->num_stripes;
1061                else if (mirror_num)
1062                        stripe_index = mirror_num - 1;
1063                else
1064                        stripe_index = stripe_nr % map->num_stripes;
1065        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1066                int factor = map->num_stripes / map->sub_stripes;
1067
1068                stripe_index = stripe_nr % factor;
1069                stripe_index *= map->sub_stripes;
1070
1071                if (rw == WRITE)
1072                        multi->num_stripes = map->sub_stripes;
1073                else if (mirror_num)
1074                        stripe_index += mirror_num - 1;
1075
1076                stripe_nr = stripe_nr / factor;
1077        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
1078                if (rw == WRITE)
1079                        multi->num_stripes = map->num_stripes;
1080                else if (mirror_num)
1081                        stripe_index = mirror_num - 1;
1082        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
1083                                BTRFS_BLOCK_GROUP_RAID6)) {
1084
1085                if (raid_map) {
1086                        int rot;
1087                        u64 tmp;
1088                        u64 raid56_full_stripe_start;
1089                        u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
1090
1091                        /*
1092                         * align the start of our data stripe in the logical
1093                         * address space
1094                         */
1095                        raid56_full_stripe_start = offset / full_stripe_len;
1096                        raid56_full_stripe_start *= full_stripe_len;
1097
1098                        /* get the data stripe number */
1099                        stripe_nr = raid56_full_stripe_start / map->stripe_len;
1100                        stripe_nr = stripe_nr / nr_data_stripes(map);
1101
1102                        /* Work out the disk rotation on this stripe-set */
1103                        rot = stripe_nr % map->num_stripes;
1104
1105                        /* Fill in the logical address of each stripe */
1106                        tmp = (u64)stripe_nr * nr_data_stripes(map);
1107
1108                        for (i = 0; i < nr_data_stripes(map); i++)
1109                                raid_map[(i+rot) % map->num_stripes] =
1110                                        ce->start + (tmp + i) * map->stripe_len;
1111
1112                        raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
1113                        if (map->type & BTRFS_BLOCK_GROUP_RAID6)
1114                                raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
1115
1116                        *length = map->stripe_len;
1117                        stripe_index = 0;
1118                        stripe_offset = 0;
1119                        multi->num_stripes = map->num_stripes;
1120                } else {
1121                        stripe_index = stripe_nr % nr_data_stripes(map);
1122                        stripe_nr = stripe_nr / nr_data_stripes(map);
1123
1124                        /*
1125                         * Mirror #0 or #1 means the original data block.
1126                         * Mirror #2 is RAID5 parity block.
1127                         * Mirror #3 is RAID6 Q block.
1128                         */
1129                        if (mirror_num > 1)
1130                                stripe_index = nr_data_stripes(map) + mirror_num - 2;
1131
1132                        /* We distribute the parity blocks across stripes */
1133                        stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
1134                }
1135        } else {
1136                /*
1137                 * after this do_div call, stripe_nr is the number of stripes
1138                 * on this device we have to walk to find the data, and
1139                 * stripe_index is the number of our device in the stripe array
1140                 */
1141                stripe_index = stripe_nr % map->num_stripes;
1142                stripe_nr = stripe_nr / map->num_stripes;
1143        }
1144        BUG_ON(stripe_index >= map->num_stripes);
1145
1146        for (i = 0; i < multi->num_stripes; i++) {
1147                multi->stripes[i].physical =
1148                        map->stripes[stripe_index].physical + stripe_offset +
1149                        stripe_nr * map->stripe_len;
1150                multi->stripes[i].dev = map->stripes[stripe_index].dev;
1151                stripe_index++;
1152        }
1153        *multi_ret = multi;
1154
1155        if (type)
1156                *type = map->type;
1157
1158        if (raid_map) {
1159                sort_parity_stripes(multi, raid_map);
1160                *raid_map_ret = raid_map;
1161        }
1162out:
1163        return 0;
1164}
1165
1166int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
1167                    u64 logical, u64 *length,
1168                    struct btrfs_multi_bio **multi_ret, int mirror_num,
1169                    u64 **raid_map_ret)
1170{
1171        return __btrfs_map_block(fs_info, rw, logical, length, NULL,
1172                                 multi_ret, mirror_num, raid_map_ret);
1173}
1174