linux/fs/btrfs/qgroup.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 STRATO.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/pagemap.h>
  21#include <linux/writeback.h>
  22#include <linux/blkdev.h>
  23#include <linux/rbtree.h>
  24#include <linux/slab.h>
  25#include <linux/workqueue.h>
  26#include <linux/btrfs.h>
  27
  28#include "ctree.h"
  29#include "transaction.h"
  30#include "disk-io.h"
  31#include "locking.h"
  32#include "ulist.h"
  33#include "backref.h"
  34#include "extent_io.h"
  35
  36/* TODO XXX FIXME
  37 *  - subvol delete -> delete when ref goes to 0? delete limits also?
  38 *  - reorganize keys
  39 *  - compressed
  40 *  - sync
  41 *  - copy also limits on subvol creation
  42 *  - limit
  43 *  - caches fuer ulists
  44 *  - performance benchmarks
  45 *  - check all ioctl parameters
  46 */
  47
  48/*
  49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
  50 */
  51struct btrfs_qgroup {
  52        u64 qgroupid;
  53
  54        /*
  55         * state
  56         */
  57        u64 rfer;       /* referenced */
  58        u64 rfer_cmpr;  /* referenced compressed */
  59        u64 excl;       /* exclusive */
  60        u64 excl_cmpr;  /* exclusive compressed */
  61
  62        /*
  63         * limits
  64         */
  65        u64 lim_flags;  /* which limits are set */
  66        u64 max_rfer;
  67        u64 max_excl;
  68        u64 rsv_rfer;
  69        u64 rsv_excl;
  70
  71        /*
  72         * reservation tracking
  73         */
  74        u64 reserved;
  75
  76        /*
  77         * lists
  78         */
  79        struct list_head groups;  /* groups this group is member of */
  80        struct list_head members; /* groups that are members of this group */
  81        struct list_head dirty;   /* dirty groups */
  82        struct rb_node node;      /* tree of qgroups */
  83
  84        /*
  85         * temp variables for accounting operations
  86         */
  87        u64 tag;
  88        u64 refcnt;
  89};
  90
  91/*
  92 * glue structure to represent the relations between qgroups.
  93 */
  94struct btrfs_qgroup_list {
  95        struct list_head next_group;
  96        struct list_head next_member;
  97        struct btrfs_qgroup *group;
  98        struct btrfs_qgroup *member;
  99};
 100
 101static int
 102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
 103                   int init_flags);
 104static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
 105
 106/* must be called with qgroup_ioctl_lock held */
 107static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 108                                           u64 qgroupid)
 109{
 110        struct rb_node *n = fs_info->qgroup_tree.rb_node;
 111        struct btrfs_qgroup *qgroup;
 112
 113        while (n) {
 114                qgroup = rb_entry(n, struct btrfs_qgroup, node);
 115                if (qgroup->qgroupid < qgroupid)
 116                        n = n->rb_left;
 117                else if (qgroup->qgroupid > qgroupid)
 118                        n = n->rb_right;
 119                else
 120                        return qgroup;
 121        }
 122        return NULL;
 123}
 124
 125/* must be called with qgroup_lock held */
 126static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
 127                                          u64 qgroupid)
 128{
 129        struct rb_node **p = &fs_info->qgroup_tree.rb_node;
 130        struct rb_node *parent = NULL;
 131        struct btrfs_qgroup *qgroup;
 132
 133        while (*p) {
 134                parent = *p;
 135                qgroup = rb_entry(parent, struct btrfs_qgroup, node);
 136
 137                if (qgroup->qgroupid < qgroupid)
 138                        p = &(*p)->rb_left;
 139                else if (qgroup->qgroupid > qgroupid)
 140                        p = &(*p)->rb_right;
 141                else
 142                        return qgroup;
 143        }
 144
 145        qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
 146        if (!qgroup)
 147                return ERR_PTR(-ENOMEM);
 148
 149        qgroup->qgroupid = qgroupid;
 150        INIT_LIST_HEAD(&qgroup->groups);
 151        INIT_LIST_HEAD(&qgroup->members);
 152        INIT_LIST_HEAD(&qgroup->dirty);
 153
 154        rb_link_node(&qgroup->node, parent, p);
 155        rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
 156
 157        return qgroup;
 158}
 159
 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
 161{
 162        struct btrfs_qgroup_list *list;
 163
 164        list_del(&qgroup->dirty);
 165        while (!list_empty(&qgroup->groups)) {
 166                list = list_first_entry(&qgroup->groups,
 167                                        struct btrfs_qgroup_list, next_group);
 168                list_del(&list->next_group);
 169                list_del(&list->next_member);
 170                kfree(list);
 171        }
 172
 173        while (!list_empty(&qgroup->members)) {
 174                list = list_first_entry(&qgroup->members,
 175                                        struct btrfs_qgroup_list, next_member);
 176                list_del(&list->next_group);
 177                list_del(&list->next_member);
 178                kfree(list);
 179        }
 180        kfree(qgroup);
 181}
 182
 183/* must be called with qgroup_lock held */
 184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 185{
 186        struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
 187
 188        if (!qgroup)
 189                return -ENOENT;
 190
 191        rb_erase(&qgroup->node, &fs_info->qgroup_tree);
 192        __del_qgroup_rb(qgroup);
 193        return 0;
 194}
 195
 196/* must be called with qgroup_lock held */
 197static int add_relation_rb(struct btrfs_fs_info *fs_info,
 198                           u64 memberid, u64 parentid)
 199{
 200        struct btrfs_qgroup *member;
 201        struct btrfs_qgroup *parent;
 202        struct btrfs_qgroup_list *list;
 203
 204        member = find_qgroup_rb(fs_info, memberid);
 205        parent = find_qgroup_rb(fs_info, parentid);
 206        if (!member || !parent)
 207                return -ENOENT;
 208
 209        list = kzalloc(sizeof(*list), GFP_ATOMIC);
 210        if (!list)
 211                return -ENOMEM;
 212
 213        list->group = parent;
 214        list->member = member;
 215        list_add_tail(&list->next_group, &member->groups);
 216        list_add_tail(&list->next_member, &parent->members);
 217
 218        return 0;
 219}
 220
 221/* must be called with qgroup_lock held */
 222static int del_relation_rb(struct btrfs_fs_info *fs_info,
 223                           u64 memberid, u64 parentid)
 224{
 225        struct btrfs_qgroup *member;
 226        struct btrfs_qgroup *parent;
 227        struct btrfs_qgroup_list *list;
 228
 229        member = find_qgroup_rb(fs_info, memberid);
 230        parent = find_qgroup_rb(fs_info, parentid);
 231        if (!member || !parent)
 232                return -ENOENT;
 233
 234        list_for_each_entry(list, &member->groups, next_group) {
 235                if (list->group == parent) {
 236                        list_del(&list->next_group);
 237                        list_del(&list->next_member);
 238                        kfree(list);
 239                        return 0;
 240                }
 241        }
 242        return -ENOENT;
 243}
 244
 245/*
 246 * The full config is read in one go, only called from open_ctree()
 247 * It doesn't use any locking, as at this point we're still single-threaded
 248 */
 249int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 250{
 251        struct btrfs_key key;
 252        struct btrfs_key found_key;
 253        struct btrfs_root *quota_root = fs_info->quota_root;
 254        struct btrfs_path *path = NULL;
 255        struct extent_buffer *l;
 256        int slot;
 257        int ret = 0;
 258        u64 flags = 0;
 259        u64 rescan_progress = 0;
 260
 261        if (!fs_info->quota_enabled)
 262                return 0;
 263
 264        fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 265        if (!fs_info->qgroup_ulist) {
 266                ret = -ENOMEM;
 267                goto out;
 268        }
 269
 270        path = btrfs_alloc_path();
 271        if (!path) {
 272                ret = -ENOMEM;
 273                goto out;
 274        }
 275
 276        /* default this to quota off, in case no status key is found */
 277        fs_info->qgroup_flags = 0;
 278
 279        /*
 280         * pass 1: read status, all qgroup infos and limits
 281         */
 282        key.objectid = 0;
 283        key.type = 0;
 284        key.offset = 0;
 285        ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
 286        if (ret)
 287                goto out;
 288
 289        while (1) {
 290                struct btrfs_qgroup *qgroup;
 291
 292                slot = path->slots[0];
 293                l = path->nodes[0];
 294                btrfs_item_key_to_cpu(l, &found_key, slot);
 295
 296                if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
 297                        struct btrfs_qgroup_status_item *ptr;
 298
 299                        ptr = btrfs_item_ptr(l, slot,
 300                                             struct btrfs_qgroup_status_item);
 301
 302                        if (btrfs_qgroup_status_version(l, ptr) !=
 303                            BTRFS_QGROUP_STATUS_VERSION) {
 304                                btrfs_err(fs_info,
 305                                 "old qgroup version, quota disabled");
 306                                goto out;
 307                        }
 308                        if (btrfs_qgroup_status_generation(l, ptr) !=
 309                            fs_info->generation) {
 310                                flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 311                                btrfs_err(fs_info,
 312                                        "qgroup generation mismatch, "
 313                                        "marked as inconsistent");
 314                        }
 315                        fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 316                                                                          ptr);
 317                        rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
 318                        goto next1;
 319                }
 320
 321                if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
 322                    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
 323                        goto next1;
 324
 325                qgroup = find_qgroup_rb(fs_info, found_key.offset);
 326                if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
 327                    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
 328                        btrfs_err(fs_info, "inconsitent qgroup config");
 329                        flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 330                }
 331                if (!qgroup) {
 332                        qgroup = add_qgroup_rb(fs_info, found_key.offset);
 333                        if (IS_ERR(qgroup)) {
 334                                ret = PTR_ERR(qgroup);
 335                                goto out;
 336                        }
 337                }
 338                switch (found_key.type) {
 339                case BTRFS_QGROUP_INFO_KEY: {
 340                        struct btrfs_qgroup_info_item *ptr;
 341
 342                        ptr = btrfs_item_ptr(l, slot,
 343                                             struct btrfs_qgroup_info_item);
 344                        qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
 345                        qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
 346                        qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
 347                        qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
 348                        /* generation currently unused */
 349                        break;
 350                }
 351                case BTRFS_QGROUP_LIMIT_KEY: {
 352                        struct btrfs_qgroup_limit_item *ptr;
 353
 354                        ptr = btrfs_item_ptr(l, slot,
 355                                             struct btrfs_qgroup_limit_item);
 356                        qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
 357                        qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
 358                        qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
 359                        qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
 360                        qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
 361                        break;
 362                }
 363                }
 364next1:
 365                ret = btrfs_next_item(quota_root, path);
 366                if (ret < 0)
 367                        goto out;
 368                if (ret)
 369                        break;
 370        }
 371        btrfs_release_path(path);
 372
 373        /*
 374         * pass 2: read all qgroup relations
 375         */
 376        key.objectid = 0;
 377        key.type = BTRFS_QGROUP_RELATION_KEY;
 378        key.offset = 0;
 379        ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
 380        if (ret)
 381                goto out;
 382        while (1) {
 383                slot = path->slots[0];
 384                l = path->nodes[0];
 385                btrfs_item_key_to_cpu(l, &found_key, slot);
 386
 387                if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
 388                        goto next2;
 389
 390                if (found_key.objectid > found_key.offset) {
 391                        /* parent <- member, not needed to build config */
 392                        /* FIXME should we omit the key completely? */
 393                        goto next2;
 394                }
 395
 396                ret = add_relation_rb(fs_info, found_key.objectid,
 397                                      found_key.offset);
 398                if (ret == -ENOENT) {
 399                        btrfs_warn(fs_info,
 400                                "orphan qgroup relation 0x%llx->0x%llx",
 401                                found_key.objectid, found_key.offset);
 402                        ret = 0;        /* ignore the error */
 403                }
 404                if (ret)
 405                        goto out;
 406next2:
 407                ret = btrfs_next_item(quota_root, path);
 408                if (ret < 0)
 409                        goto out;
 410                if (ret)
 411                        break;
 412        }
 413out:
 414        fs_info->qgroup_flags |= flags;
 415        if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
 416                fs_info->quota_enabled = 0;
 417                fs_info->pending_quota_state = 0;
 418        } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
 419                   ret >= 0) {
 420                ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
 421        }
 422        btrfs_free_path(path);
 423
 424        if (ret < 0) {
 425                ulist_free(fs_info->qgroup_ulist);
 426                fs_info->qgroup_ulist = NULL;
 427                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 428        }
 429
 430        return ret < 0 ? ret : 0;
 431}
 432
 433/*
 434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
 435 * first two are in single-threaded paths.And for the third one, we have set
 436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
 437 * up the in-memory structures without qgroup_lock held.
 438 */
 439void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 440{
 441        struct rb_node *n;
 442        struct btrfs_qgroup *qgroup;
 443
 444        while ((n = rb_first(&fs_info->qgroup_tree))) {
 445                qgroup = rb_entry(n, struct btrfs_qgroup, node);
 446                rb_erase(n, &fs_info->qgroup_tree);
 447                __del_qgroup_rb(qgroup);
 448        }
 449        /*
 450         * we call btrfs_free_qgroup_config() when umounting
 451         * filesystem and disabling quota, so we set qgroup_ulit
 452         * to be null here to avoid double free.
 453         */
 454        ulist_free(fs_info->qgroup_ulist);
 455        fs_info->qgroup_ulist = NULL;
 456}
 457
 458static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
 459                                    struct btrfs_root *quota_root,
 460                                    u64 src, u64 dst)
 461{
 462        int ret;
 463        struct btrfs_path *path;
 464        struct btrfs_key key;
 465
 466        path = btrfs_alloc_path();
 467        if (!path)
 468                return -ENOMEM;
 469
 470        key.objectid = src;
 471        key.type = BTRFS_QGROUP_RELATION_KEY;
 472        key.offset = dst;
 473
 474        ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
 475
 476        btrfs_mark_buffer_dirty(path->nodes[0]);
 477
 478        btrfs_free_path(path);
 479        return ret;
 480}
 481
 482static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
 483                                    struct btrfs_root *quota_root,
 484                                    u64 src, u64 dst)
 485{
 486        int ret;
 487        struct btrfs_path *path;
 488        struct btrfs_key key;
 489
 490        path = btrfs_alloc_path();
 491        if (!path)
 492                return -ENOMEM;
 493
 494        key.objectid = src;
 495        key.type = BTRFS_QGROUP_RELATION_KEY;
 496        key.offset = dst;
 497
 498        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 499        if (ret < 0)
 500                goto out;
 501
 502        if (ret > 0) {
 503                ret = -ENOENT;
 504                goto out;
 505        }
 506
 507        ret = btrfs_del_item(trans, quota_root, path);
 508out:
 509        btrfs_free_path(path);
 510        return ret;
 511}
 512
 513static int add_qgroup_item(struct btrfs_trans_handle *trans,
 514                           struct btrfs_root *quota_root, u64 qgroupid)
 515{
 516        int ret;
 517        struct btrfs_path *path;
 518        struct btrfs_qgroup_info_item *qgroup_info;
 519        struct btrfs_qgroup_limit_item *qgroup_limit;
 520        struct extent_buffer *leaf;
 521        struct btrfs_key key;
 522
 523        path = btrfs_alloc_path();
 524        if (!path)
 525                return -ENOMEM;
 526
 527        key.objectid = 0;
 528        key.type = BTRFS_QGROUP_INFO_KEY;
 529        key.offset = qgroupid;
 530
 531        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 532                                      sizeof(*qgroup_info));
 533        if (ret)
 534                goto out;
 535
 536        leaf = path->nodes[0];
 537        qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
 538                                 struct btrfs_qgroup_info_item);
 539        btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
 540        btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
 541        btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
 542        btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
 543        btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
 544
 545        btrfs_mark_buffer_dirty(leaf);
 546
 547        btrfs_release_path(path);
 548
 549        key.type = BTRFS_QGROUP_LIMIT_KEY;
 550        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 551                                      sizeof(*qgroup_limit));
 552        if (ret)
 553                goto out;
 554
 555        leaf = path->nodes[0];
 556        qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
 557                                  struct btrfs_qgroup_limit_item);
 558        btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
 559        btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
 560        btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
 561        btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
 562        btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
 563
 564        btrfs_mark_buffer_dirty(leaf);
 565
 566        ret = 0;
 567out:
 568        btrfs_free_path(path);
 569        return ret;
 570}
 571
 572static int del_qgroup_item(struct btrfs_trans_handle *trans,
 573                           struct btrfs_root *quota_root, u64 qgroupid)
 574{
 575        int ret;
 576        struct btrfs_path *path;
 577        struct btrfs_key key;
 578
 579        path = btrfs_alloc_path();
 580        if (!path)
 581                return -ENOMEM;
 582
 583        key.objectid = 0;
 584        key.type = BTRFS_QGROUP_INFO_KEY;
 585        key.offset = qgroupid;
 586        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 587        if (ret < 0)
 588                goto out;
 589
 590        if (ret > 0) {
 591                ret = -ENOENT;
 592                goto out;
 593        }
 594
 595        ret = btrfs_del_item(trans, quota_root, path);
 596        if (ret)
 597                goto out;
 598
 599        btrfs_release_path(path);
 600
 601        key.type = BTRFS_QGROUP_LIMIT_KEY;
 602        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 603        if (ret < 0)
 604                goto out;
 605
 606        if (ret > 0) {
 607                ret = -ENOENT;
 608                goto out;
 609        }
 610
 611        ret = btrfs_del_item(trans, quota_root, path);
 612
 613out:
 614        btrfs_free_path(path);
 615        return ret;
 616}
 617
 618static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
 619                                    struct btrfs_root *root, u64 qgroupid,
 620                                    u64 flags, u64 max_rfer, u64 max_excl,
 621                                    u64 rsv_rfer, u64 rsv_excl)
 622{
 623        struct btrfs_path *path;
 624        struct btrfs_key key;
 625        struct extent_buffer *l;
 626        struct btrfs_qgroup_limit_item *qgroup_limit;
 627        int ret;
 628        int slot;
 629
 630        key.objectid = 0;
 631        key.type = BTRFS_QGROUP_LIMIT_KEY;
 632        key.offset = qgroupid;
 633
 634        path = btrfs_alloc_path();
 635        if (!path)
 636                return -ENOMEM;
 637
 638        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 639        if (ret > 0)
 640                ret = -ENOENT;
 641
 642        if (ret)
 643                goto out;
 644
 645        l = path->nodes[0];
 646        slot = path->slots[0];
 647        qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
 648        btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
 649        btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
 650        btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
 651        btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
 652        btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
 653
 654        btrfs_mark_buffer_dirty(l);
 655
 656out:
 657        btrfs_free_path(path);
 658        return ret;
 659}
 660
 661static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
 662                                   struct btrfs_root *root,
 663                                   struct btrfs_qgroup *qgroup)
 664{
 665        struct btrfs_path *path;
 666        struct btrfs_key key;
 667        struct extent_buffer *l;
 668        struct btrfs_qgroup_info_item *qgroup_info;
 669        int ret;
 670        int slot;
 671
 672        key.objectid = 0;
 673        key.type = BTRFS_QGROUP_INFO_KEY;
 674        key.offset = qgroup->qgroupid;
 675
 676        path = btrfs_alloc_path();
 677        if (!path)
 678                return -ENOMEM;
 679
 680        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 681        if (ret > 0)
 682                ret = -ENOENT;
 683
 684        if (ret)
 685                goto out;
 686
 687        l = path->nodes[0];
 688        slot = path->slots[0];
 689        qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
 690        btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
 691        btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
 692        btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
 693        btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
 694        btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
 695
 696        btrfs_mark_buffer_dirty(l);
 697
 698out:
 699        btrfs_free_path(path);
 700        return ret;
 701}
 702
 703static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 704                                     struct btrfs_fs_info *fs_info,
 705                                    struct btrfs_root *root)
 706{
 707        struct btrfs_path *path;
 708        struct btrfs_key key;
 709        struct extent_buffer *l;
 710        struct btrfs_qgroup_status_item *ptr;
 711        int ret;
 712        int slot;
 713
 714        key.objectid = 0;
 715        key.type = BTRFS_QGROUP_STATUS_KEY;
 716        key.offset = 0;
 717
 718        path = btrfs_alloc_path();
 719        if (!path)
 720                return -ENOMEM;
 721
 722        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 723        if (ret > 0)
 724                ret = -ENOENT;
 725
 726        if (ret)
 727                goto out;
 728
 729        l = path->nodes[0];
 730        slot = path->slots[0];
 731        ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
 732        btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 733        btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
 734        btrfs_set_qgroup_status_rescan(l, ptr,
 735                                fs_info->qgroup_rescan_progress.objectid);
 736
 737        btrfs_mark_buffer_dirty(l);
 738
 739out:
 740        btrfs_free_path(path);
 741        return ret;
 742}
 743
 744/*
 745 * called with qgroup_lock held
 746 */
 747static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
 748                                  struct btrfs_root *root)
 749{
 750        struct btrfs_path *path;
 751        struct btrfs_key key;
 752        struct extent_buffer *leaf = NULL;
 753        int ret;
 754        int nr = 0;
 755
 756        path = btrfs_alloc_path();
 757        if (!path)
 758                return -ENOMEM;
 759
 760        path->leave_spinning = 1;
 761
 762        key.objectid = 0;
 763        key.offset = 0;
 764        key.type = 0;
 765
 766        while (1) {
 767                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 768                if (ret < 0)
 769                        goto out;
 770                leaf = path->nodes[0];
 771                nr = btrfs_header_nritems(leaf);
 772                if (!nr)
 773                        break;
 774                /*
 775                 * delete the leaf one by one
 776                 * since the whole tree is going
 777                 * to be deleted.
 778                 */
 779                path->slots[0] = 0;
 780                ret = btrfs_del_items(trans, root, path, 0, nr);
 781                if (ret)
 782                        goto out;
 783
 784                btrfs_release_path(path);
 785        }
 786        ret = 0;
 787out:
 788        root->fs_info->pending_quota_state = 0;
 789        btrfs_free_path(path);
 790        return ret;
 791}
 792
 793int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 794                       struct btrfs_fs_info *fs_info)
 795{
 796        struct btrfs_root *quota_root;
 797        struct btrfs_root *tree_root = fs_info->tree_root;
 798        struct btrfs_path *path = NULL;
 799        struct btrfs_qgroup_status_item *ptr;
 800        struct extent_buffer *leaf;
 801        struct btrfs_key key;
 802        struct btrfs_key found_key;
 803        struct btrfs_qgroup *qgroup = NULL;
 804        int ret = 0;
 805        int slot;
 806
 807        mutex_lock(&fs_info->qgroup_ioctl_lock);
 808        if (fs_info->quota_root) {
 809                fs_info->pending_quota_state = 1;
 810                goto out;
 811        }
 812
 813        fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 814        if (!fs_info->qgroup_ulist) {
 815                ret = -ENOMEM;
 816                goto out;
 817        }
 818
 819        /*
 820         * initially create the quota tree
 821         */
 822        quota_root = btrfs_create_tree(trans, fs_info,
 823                                       BTRFS_QUOTA_TREE_OBJECTID);
 824        if (IS_ERR(quota_root)) {
 825                ret =  PTR_ERR(quota_root);
 826                goto out;
 827        }
 828
 829        path = btrfs_alloc_path();
 830        if (!path) {
 831                ret = -ENOMEM;
 832                goto out_free_root;
 833        }
 834
 835        key.objectid = 0;
 836        key.type = BTRFS_QGROUP_STATUS_KEY;
 837        key.offset = 0;
 838
 839        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 840                                      sizeof(*ptr));
 841        if (ret)
 842                goto out_free_path;
 843
 844        leaf = path->nodes[0];
 845        ptr = btrfs_item_ptr(leaf, path->slots[0],
 846                                 struct btrfs_qgroup_status_item);
 847        btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
 848        btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
 849        fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 850                                BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 851        btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
 852        btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 853
 854        btrfs_mark_buffer_dirty(leaf);
 855
 856        key.objectid = 0;
 857        key.type = BTRFS_ROOT_REF_KEY;
 858        key.offset = 0;
 859
 860        btrfs_release_path(path);
 861        ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
 862        if (ret > 0)
 863                goto out_add_root;
 864        if (ret < 0)
 865                goto out_free_path;
 866
 867
 868        while (1) {
 869                slot = path->slots[0];
 870                leaf = path->nodes[0];
 871                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 872
 873                if (found_key.type == BTRFS_ROOT_REF_KEY) {
 874                        ret = add_qgroup_item(trans, quota_root,
 875                                              found_key.offset);
 876                        if (ret)
 877                                goto out_free_path;
 878
 879                        qgroup = add_qgroup_rb(fs_info, found_key.offset);
 880                        if (IS_ERR(qgroup)) {
 881                                ret = PTR_ERR(qgroup);
 882                                goto out_free_path;
 883                        }
 884                }
 885                ret = btrfs_next_item(tree_root, path);
 886                if (ret < 0)
 887                        goto out_free_path;
 888                if (ret)
 889                        break;
 890        }
 891
 892out_add_root:
 893        btrfs_release_path(path);
 894        ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
 895        if (ret)
 896                goto out_free_path;
 897
 898        qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
 899        if (IS_ERR(qgroup)) {
 900                ret = PTR_ERR(qgroup);
 901                goto out_free_path;
 902        }
 903        spin_lock(&fs_info->qgroup_lock);
 904        fs_info->quota_root = quota_root;
 905        fs_info->pending_quota_state = 1;
 906        spin_unlock(&fs_info->qgroup_lock);
 907out_free_path:
 908        btrfs_free_path(path);
 909out_free_root:
 910        if (ret) {
 911                free_extent_buffer(quota_root->node);
 912                free_extent_buffer(quota_root->commit_root);
 913                kfree(quota_root);
 914        }
 915out:
 916        if (ret) {
 917                ulist_free(fs_info->qgroup_ulist);
 918                fs_info->qgroup_ulist = NULL;
 919        }
 920        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 921        return ret;
 922}
 923
 924int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 925                        struct btrfs_fs_info *fs_info)
 926{
 927        struct btrfs_root *tree_root = fs_info->tree_root;
 928        struct btrfs_root *quota_root;
 929        int ret = 0;
 930
 931        mutex_lock(&fs_info->qgroup_ioctl_lock);
 932        if (!fs_info->quota_root)
 933                goto out;
 934        spin_lock(&fs_info->qgroup_lock);
 935        fs_info->quota_enabled = 0;
 936        fs_info->pending_quota_state = 0;
 937        quota_root = fs_info->quota_root;
 938        fs_info->quota_root = NULL;
 939        spin_unlock(&fs_info->qgroup_lock);
 940
 941        btrfs_free_qgroup_config(fs_info);
 942
 943        ret = btrfs_clean_quota_tree(trans, quota_root);
 944        if (ret)
 945                goto out;
 946
 947        ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
 948        if (ret)
 949                goto out;
 950
 951        list_del(&quota_root->dirty_list);
 952
 953        btrfs_tree_lock(quota_root->node);
 954        clean_tree_block(trans, tree_root, quota_root->node);
 955        btrfs_tree_unlock(quota_root->node);
 956        btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 957
 958        free_extent_buffer(quota_root->node);
 959        free_extent_buffer(quota_root->commit_root);
 960        kfree(quota_root);
 961out:
 962        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 963        return ret;
 964}
 965
 966static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 967                         struct btrfs_qgroup *qgroup)
 968{
 969        if (list_empty(&qgroup->dirty))
 970                list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 971}
 972
 973int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 974                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 975{
 976        struct btrfs_root *quota_root;
 977        struct btrfs_qgroup *parent;
 978        struct btrfs_qgroup *member;
 979        struct btrfs_qgroup_list *list;
 980        int ret = 0;
 981
 982        mutex_lock(&fs_info->qgroup_ioctl_lock);
 983        quota_root = fs_info->quota_root;
 984        if (!quota_root) {
 985                ret = -EINVAL;
 986                goto out;
 987        }
 988        member = find_qgroup_rb(fs_info, src);
 989        parent = find_qgroup_rb(fs_info, dst);
 990        if (!member || !parent) {
 991                ret = -EINVAL;
 992                goto out;
 993        }
 994
 995        /* check if such qgroup relation exist firstly */
 996        list_for_each_entry(list, &member->groups, next_group) {
 997                if (list->group == parent) {
 998                        ret = -EEXIST;
 999                        goto out;
1000                }
1001        }
1002
1003        ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1004        if (ret)
1005                goto out;
1006
1007        ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1008        if (ret) {
1009                del_qgroup_relation_item(trans, quota_root, src, dst);
1010                goto out;
1011        }
1012
1013        spin_lock(&fs_info->qgroup_lock);
1014        ret = add_relation_rb(quota_root->fs_info, src, dst);
1015        spin_unlock(&fs_info->qgroup_lock);
1016out:
1017        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1018        return ret;
1019}
1020
1021int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1022                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1023{
1024        struct btrfs_root *quota_root;
1025        struct btrfs_qgroup *parent;
1026        struct btrfs_qgroup *member;
1027        struct btrfs_qgroup_list *list;
1028        int ret = 0;
1029        int err;
1030
1031        mutex_lock(&fs_info->qgroup_ioctl_lock);
1032        quota_root = fs_info->quota_root;
1033        if (!quota_root) {
1034                ret = -EINVAL;
1035                goto out;
1036        }
1037
1038        member = find_qgroup_rb(fs_info, src);
1039        parent = find_qgroup_rb(fs_info, dst);
1040        if (!member || !parent) {
1041                ret = -EINVAL;
1042                goto out;
1043        }
1044
1045        /* check if such qgroup relation exist firstly */
1046        list_for_each_entry(list, &member->groups, next_group) {
1047                if (list->group == parent)
1048                        goto exist;
1049        }
1050        ret = -ENOENT;
1051        goto out;
1052exist:
1053        ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1054        err = del_qgroup_relation_item(trans, quota_root, dst, src);
1055        if (err && !ret)
1056                ret = err;
1057
1058        spin_lock(&fs_info->qgroup_lock);
1059        del_relation_rb(fs_info, src, dst);
1060        spin_unlock(&fs_info->qgroup_lock);
1061out:
1062        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1063        return ret;
1064}
1065
1066int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1067                        struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1068{
1069        struct btrfs_root *quota_root;
1070        struct btrfs_qgroup *qgroup;
1071        int ret = 0;
1072
1073        mutex_lock(&fs_info->qgroup_ioctl_lock);
1074        quota_root = fs_info->quota_root;
1075        if (!quota_root) {
1076                ret = -EINVAL;
1077                goto out;
1078        }
1079        qgroup = find_qgroup_rb(fs_info, qgroupid);
1080        if (qgroup) {
1081                ret = -EEXIST;
1082                goto out;
1083        }
1084
1085        ret = add_qgroup_item(trans, quota_root, qgroupid);
1086        if (ret)
1087                goto out;
1088
1089        spin_lock(&fs_info->qgroup_lock);
1090        qgroup = add_qgroup_rb(fs_info, qgroupid);
1091        spin_unlock(&fs_info->qgroup_lock);
1092
1093        if (IS_ERR(qgroup))
1094                ret = PTR_ERR(qgroup);
1095out:
1096        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1097        return ret;
1098}
1099
1100int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1101                        struct btrfs_fs_info *fs_info, u64 qgroupid)
1102{
1103        struct btrfs_root *quota_root;
1104        struct btrfs_qgroup *qgroup;
1105        int ret = 0;
1106
1107        mutex_lock(&fs_info->qgroup_ioctl_lock);
1108        quota_root = fs_info->quota_root;
1109        if (!quota_root) {
1110                ret = -EINVAL;
1111                goto out;
1112        }
1113
1114        qgroup = find_qgroup_rb(fs_info, qgroupid);
1115        if (!qgroup) {
1116                ret = -ENOENT;
1117                goto out;
1118        } else {
1119                /* check if there are no relations to this qgroup */
1120                if (!list_empty(&qgroup->groups) ||
1121                    !list_empty(&qgroup->members)) {
1122                        ret = -EBUSY;
1123                        goto out;
1124                }
1125        }
1126        ret = del_qgroup_item(trans, quota_root, qgroupid);
1127
1128        spin_lock(&fs_info->qgroup_lock);
1129        del_qgroup_rb(quota_root->fs_info, qgroupid);
1130        spin_unlock(&fs_info->qgroup_lock);
1131out:
1132        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1133        return ret;
1134}
1135
1136int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1137                       struct btrfs_fs_info *fs_info, u64 qgroupid,
1138                       struct btrfs_qgroup_limit *limit)
1139{
1140        struct btrfs_root *quota_root;
1141        struct btrfs_qgroup *qgroup;
1142        int ret = 0;
1143
1144        mutex_lock(&fs_info->qgroup_ioctl_lock);
1145        quota_root = fs_info->quota_root;
1146        if (!quota_root) {
1147                ret = -EINVAL;
1148                goto out;
1149        }
1150
1151        qgroup = find_qgroup_rb(fs_info, qgroupid);
1152        if (!qgroup) {
1153                ret = -ENOENT;
1154                goto out;
1155        }
1156        ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1157                                       limit->flags, limit->max_rfer,
1158                                       limit->max_excl, limit->rsv_rfer,
1159                                       limit->rsv_excl);
1160        if (ret) {
1161                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1162                btrfs_info(fs_info, "unable to update quota limit for %llu",
1163                       qgroupid);
1164        }
1165
1166        spin_lock(&fs_info->qgroup_lock);
1167        qgroup->lim_flags = limit->flags;
1168        qgroup->max_rfer = limit->max_rfer;
1169        qgroup->max_excl = limit->max_excl;
1170        qgroup->rsv_rfer = limit->rsv_rfer;
1171        qgroup->rsv_excl = limit->rsv_excl;
1172        spin_unlock(&fs_info->qgroup_lock);
1173out:
1174        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175        return ret;
1176}
1177
1178/*
1179 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1180 * the modification into a list that's later used by btrfs_end_transaction to
1181 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1182 */
1183int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184                            struct btrfs_delayed_ref_node *node,
1185                            struct btrfs_delayed_extent_op *extent_op)
1186{
1187        struct qgroup_update *u;
1188
1189        BUG_ON(!trans->delayed_ref_elem.seq);
1190        u = kmalloc(sizeof(*u), GFP_NOFS);
1191        if (!u)
1192                return -ENOMEM;
1193
1194        u->node = node;
1195        u->extent_op = extent_op;
1196        list_add_tail(&u->list, &trans->qgroup_ref_list);
1197
1198        return 0;
1199}
1200
1201static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1202                                    struct ulist *roots, struct ulist *tmp,
1203                                    u64 seq)
1204{
1205        struct ulist_node *unode;
1206        struct ulist_iterator uiter;
1207        struct ulist_node *tmp_unode;
1208        struct ulist_iterator tmp_uiter;
1209        struct btrfs_qgroup *qg;
1210        int ret;
1211
1212        ULIST_ITER_INIT(&uiter);
1213        while ((unode = ulist_next(roots, &uiter))) {
1214                qg = find_qgroup_rb(fs_info, unode->val);
1215                if (!qg)
1216                        continue;
1217
1218                ulist_reinit(tmp);
1219                                                /* XXX id not needed */
1220                ret = ulist_add(tmp, qg->qgroupid,
1221                                (u64)(uintptr_t)qg, GFP_ATOMIC);
1222                if (ret < 0)
1223                        return ret;
1224                ULIST_ITER_INIT(&tmp_uiter);
1225                while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226                        struct btrfs_qgroup_list *glist;
1227
1228                        qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1229                        if (qg->refcnt < seq)
1230                                qg->refcnt = seq + 1;
1231                        else
1232                                ++qg->refcnt;
1233
1234                        list_for_each_entry(glist, &qg->groups, next_group) {
1235                                ret = ulist_add(tmp, glist->group->qgroupid,
1236                                                (u64)(uintptr_t)glist->group,
1237                                                GFP_ATOMIC);
1238                                if (ret < 0)
1239                                        return ret;
1240                        }
1241                }
1242        }
1243
1244        return 0;
1245}
1246
1247static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1248                                    struct ulist *roots, struct ulist *tmp,
1249                                    u64 seq, int sgn, u64 num_bytes,
1250                                    struct btrfs_qgroup *qgroup)
1251{
1252        struct ulist_node *unode;
1253        struct ulist_iterator uiter;
1254        struct btrfs_qgroup *qg;
1255        struct btrfs_qgroup_list *glist;
1256        int ret;
1257
1258        ulist_reinit(tmp);
1259        ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1260        if (ret < 0)
1261                return ret;
1262
1263        ULIST_ITER_INIT(&uiter);
1264        while ((unode = ulist_next(tmp, &uiter))) {
1265                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1266                if (qg->refcnt < seq) {
1267                        /* not visited by step 1 */
1268                        qg->rfer += sgn * num_bytes;
1269                        qg->rfer_cmpr += sgn * num_bytes;
1270                        if (roots->nnodes == 0) {
1271                                qg->excl += sgn * num_bytes;
1272                                qg->excl_cmpr += sgn * num_bytes;
1273                        }
1274                        qgroup_dirty(fs_info, qg);
1275                }
1276                WARN_ON(qg->tag >= seq);
1277                qg->tag = seq;
1278
1279                list_for_each_entry(glist, &qg->groups, next_group) {
1280                        ret = ulist_add(tmp, glist->group->qgroupid,
1281                                        (uintptr_t)glist->group, GFP_ATOMIC);
1282                        if (ret < 0)
1283                                return ret;
1284                }
1285        }
1286
1287        return 0;
1288}
1289
1290static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1291                                    struct ulist *roots, struct ulist *tmp,
1292                                    u64 seq, int sgn, u64 num_bytes)
1293{
1294        struct ulist_node *unode;
1295        struct ulist_iterator uiter;
1296        struct btrfs_qgroup *qg;
1297        struct ulist_node *tmp_unode;
1298        struct ulist_iterator tmp_uiter;
1299        int ret;
1300
1301        ULIST_ITER_INIT(&uiter);
1302        while ((unode = ulist_next(roots, &uiter))) {
1303                qg = find_qgroup_rb(fs_info, unode->val);
1304                if (!qg)
1305                        continue;
1306
1307                ulist_reinit(tmp);
1308                ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1309                if (ret < 0)
1310                        return ret;
1311
1312                ULIST_ITER_INIT(&tmp_uiter);
1313                while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1314                        struct btrfs_qgroup_list *glist;
1315
1316                        qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1317                        if (qg->tag == seq)
1318                                continue;
1319
1320                        if (qg->refcnt - seq == roots->nnodes) {
1321                                qg->excl -= sgn * num_bytes;
1322                                qg->excl_cmpr -= sgn * num_bytes;
1323                                qgroup_dirty(fs_info, qg);
1324                        }
1325
1326                        list_for_each_entry(glist, &qg->groups, next_group) {
1327                                ret = ulist_add(tmp, glist->group->qgroupid,
1328                                                (uintptr_t)glist->group,
1329                                                GFP_ATOMIC);
1330                                if (ret < 0)
1331                                        return ret;
1332                        }
1333                }
1334        }
1335
1336        return 0;
1337}
1338
1339/*
1340 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1341 * from the fs. First, all roots referencing the extent are searched, and
1342 * then the space is accounted accordingly to the different roots. The
1343 * accounting algorithm works in 3 steps documented inline.
1344 */
1345int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1346                             struct btrfs_fs_info *fs_info,
1347                             struct btrfs_delayed_ref_node *node,
1348                             struct btrfs_delayed_extent_op *extent_op)
1349{
1350        struct btrfs_root *quota_root;
1351        u64 ref_root;
1352        struct btrfs_qgroup *qgroup;
1353        struct ulist *roots = NULL;
1354        u64 seq;
1355        int ret = 0;
1356        int sgn;
1357
1358        if (!fs_info->quota_enabled)
1359                return 0;
1360
1361        BUG_ON(!fs_info->quota_root);
1362
1363        if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1364            node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1365                struct btrfs_delayed_tree_ref *ref;
1366                ref = btrfs_delayed_node_to_tree_ref(node);
1367                ref_root = ref->root;
1368        } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1369                   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370                struct btrfs_delayed_data_ref *ref;
1371                ref = btrfs_delayed_node_to_data_ref(node);
1372                ref_root = ref->root;
1373        } else {
1374                BUG();
1375        }
1376
1377        if (!is_fstree(ref_root)) {
1378                /*
1379                 * non-fs-trees are not being accounted
1380                 */
1381                return 0;
1382        }
1383
1384        switch (node->action) {
1385        case BTRFS_ADD_DELAYED_REF:
1386        case BTRFS_ADD_DELAYED_EXTENT:
1387                sgn = 1;
1388                seq = btrfs_tree_mod_seq_prev(node->seq);
1389                break;
1390        case BTRFS_DROP_DELAYED_REF:
1391                sgn = -1;
1392                seq = node->seq;
1393                break;
1394        case BTRFS_UPDATE_DELAYED_HEAD:
1395                return 0;
1396        default:
1397                BUG();
1398        }
1399
1400        mutex_lock(&fs_info->qgroup_rescan_lock);
1401        if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1402                if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1403                        mutex_unlock(&fs_info->qgroup_rescan_lock);
1404                        return 0;
1405                }
1406        }
1407        mutex_unlock(&fs_info->qgroup_rescan_lock);
1408
1409        /*
1410         * the delayed ref sequence number we pass depends on the direction of
1411         * the operation. for add operations, we pass
1412         * tree_mod_log_prev_seq(node->seq) to skip
1413         * the delayed ref's current sequence number, because we need the state
1414         * of the tree before the add operation. for delete operations, we pass
1415         * (node->seq) to include the delayed ref's current sequence number,
1416         * because we need the state of the tree after the delete operation.
1417         */
1418        ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419        if (ret < 0)
1420                return ret;
1421
1422        spin_lock(&fs_info->qgroup_lock);
1423
1424        quota_root = fs_info->quota_root;
1425        if (!quota_root)
1426                goto unlock;
1427
1428        qgroup = find_qgroup_rb(fs_info, ref_root);
1429        if (!qgroup)
1430                goto unlock;
1431
1432        /*
1433         * step 1: for each old ref, visit all nodes once and inc refcnt
1434         */
1435        ulist_reinit(fs_info->qgroup_ulist);
1436        seq = fs_info->qgroup_seq;
1437        fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1438
1439        ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1440                                       seq);
1441        if (ret)
1442                goto unlock;
1443
1444        /*
1445         * step 2: walk from the new root
1446         */
1447        ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1448                                       seq, sgn, node->num_bytes, qgroup);
1449        if (ret)
1450                goto unlock;
1451
1452        /*
1453         * step 3: walk again from old refs
1454         */
1455        ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1456                                       seq, sgn, node->num_bytes);
1457        if (ret)
1458                goto unlock;
1459
1460unlock:
1461        spin_unlock(&fs_info->qgroup_lock);
1462        ulist_free(roots);
1463
1464        return ret;
1465}
1466
1467/*
1468 * called from commit_transaction. Writes all changed qgroups to disk.
1469 */
1470int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1471                      struct btrfs_fs_info *fs_info)
1472{
1473        struct btrfs_root *quota_root = fs_info->quota_root;
1474        int ret = 0;
1475        int start_rescan_worker = 0;
1476
1477        if (!quota_root)
1478                goto out;
1479
1480        if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1481                start_rescan_worker = 1;
1482
1483        fs_info->quota_enabled = fs_info->pending_quota_state;
1484
1485        spin_lock(&fs_info->qgroup_lock);
1486        while (!list_empty(&fs_info->dirty_qgroups)) {
1487                struct btrfs_qgroup *qgroup;
1488                qgroup = list_first_entry(&fs_info->dirty_qgroups,
1489                                          struct btrfs_qgroup, dirty);
1490                list_del_init(&qgroup->dirty);
1491                spin_unlock(&fs_info->qgroup_lock);
1492                ret = update_qgroup_info_item(trans, quota_root, qgroup);
1493                if (ret)
1494                        fs_info->qgroup_flags |=
1495                                        BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1496                spin_lock(&fs_info->qgroup_lock);
1497        }
1498        if (fs_info->quota_enabled)
1499                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1500        else
1501                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1502        spin_unlock(&fs_info->qgroup_lock);
1503
1504        ret = update_qgroup_status_item(trans, fs_info, quota_root);
1505        if (ret)
1506                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1507
1508        if (!ret && start_rescan_worker) {
1509                ret = qgroup_rescan_init(fs_info, 0, 1);
1510                if (!ret) {
1511                        qgroup_rescan_zero_tracking(fs_info);
1512                        btrfs_queue_work(fs_info->qgroup_rescan_workers,
1513                                         &fs_info->qgroup_rescan_work);
1514                }
1515                ret = 0;
1516        }
1517
1518out:
1519
1520        return ret;
1521}
1522
1523/*
1524 * copy the acounting information between qgroups. This is necessary when a
1525 * snapshot or a subvolume is created
1526 */
1527int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1528                         struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1529                         struct btrfs_qgroup_inherit *inherit)
1530{
1531        int ret = 0;
1532        int i;
1533        u64 *i_qgroups;
1534        struct btrfs_root *quota_root = fs_info->quota_root;
1535        struct btrfs_qgroup *srcgroup;
1536        struct btrfs_qgroup *dstgroup;
1537        u32 level_size = 0;
1538        u64 nums;
1539
1540        mutex_lock(&fs_info->qgroup_ioctl_lock);
1541        if (!fs_info->quota_enabled)
1542                goto out;
1543
1544        if (!quota_root) {
1545                ret = -EINVAL;
1546                goto out;
1547        }
1548
1549        if (inherit) {
1550                i_qgroups = (u64 *)(inherit + 1);
1551                nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1552                       2 * inherit->num_excl_copies;
1553                for (i = 0; i < nums; ++i) {
1554                        srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1555                        if (!srcgroup) {
1556                                ret = -EINVAL;
1557                                goto out;
1558                        }
1559                        ++i_qgroups;
1560                }
1561        }
1562
1563        /*
1564         * create a tracking group for the subvol itself
1565         */
1566        ret = add_qgroup_item(trans, quota_root, objectid);
1567        if (ret)
1568                goto out;
1569
1570        if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1571                ret = update_qgroup_limit_item(trans, quota_root, objectid,
1572                                               inherit->lim.flags,
1573                                               inherit->lim.max_rfer,
1574                                               inherit->lim.max_excl,
1575                                               inherit->lim.rsv_rfer,
1576                                               inherit->lim.rsv_excl);
1577                if (ret)
1578                        goto out;
1579        }
1580
1581        if (srcid) {
1582                struct btrfs_root *srcroot;
1583                struct btrfs_key srckey;
1584                int srcroot_level;
1585
1586                srckey.objectid = srcid;
1587                srckey.type = BTRFS_ROOT_ITEM_KEY;
1588                srckey.offset = (u64)-1;
1589                srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1590                if (IS_ERR(srcroot)) {
1591                        ret = PTR_ERR(srcroot);
1592                        goto out;
1593                }
1594
1595                rcu_read_lock();
1596                srcroot_level = btrfs_header_level(srcroot->node);
1597                level_size = btrfs_level_size(srcroot, srcroot_level);
1598                rcu_read_unlock();
1599        }
1600
1601        /*
1602         * add qgroup to all inherited groups
1603         */
1604        if (inherit) {
1605                i_qgroups = (u64 *)(inherit + 1);
1606                for (i = 0; i < inherit->num_qgroups; ++i) {
1607                        ret = add_qgroup_relation_item(trans, quota_root,
1608                                                       objectid, *i_qgroups);
1609                        if (ret)
1610                                goto out;
1611                        ret = add_qgroup_relation_item(trans, quota_root,
1612                                                       *i_qgroups, objectid);
1613                        if (ret)
1614                                goto out;
1615                        ++i_qgroups;
1616                }
1617        }
1618
1619
1620        spin_lock(&fs_info->qgroup_lock);
1621
1622        dstgroup = add_qgroup_rb(fs_info, objectid);
1623        if (IS_ERR(dstgroup)) {
1624                ret = PTR_ERR(dstgroup);
1625                goto unlock;
1626        }
1627
1628        if (srcid) {
1629                srcgroup = find_qgroup_rb(fs_info, srcid);
1630                if (!srcgroup)
1631                        goto unlock;
1632                dstgroup->rfer = srcgroup->rfer - level_size;
1633                dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1634                srcgroup->excl = level_size;
1635                srcgroup->excl_cmpr = level_size;
1636                qgroup_dirty(fs_info, dstgroup);
1637                qgroup_dirty(fs_info, srcgroup);
1638        }
1639
1640        if (!inherit)
1641                goto unlock;
1642
1643        i_qgroups = (u64 *)(inherit + 1);
1644        for (i = 0; i < inherit->num_qgroups; ++i) {
1645                ret = add_relation_rb(quota_root->fs_info, objectid,
1646                                      *i_qgroups);
1647                if (ret)
1648                        goto unlock;
1649                ++i_qgroups;
1650        }
1651
1652        for (i = 0; i <  inherit->num_ref_copies; ++i) {
1653                struct btrfs_qgroup *src;
1654                struct btrfs_qgroup *dst;
1655
1656                src = find_qgroup_rb(fs_info, i_qgroups[0]);
1657                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1658
1659                if (!src || !dst) {
1660                        ret = -EINVAL;
1661                        goto unlock;
1662                }
1663
1664                dst->rfer = src->rfer - level_size;
1665                dst->rfer_cmpr = src->rfer_cmpr - level_size;
1666                i_qgroups += 2;
1667        }
1668        for (i = 0; i <  inherit->num_excl_copies; ++i) {
1669                struct btrfs_qgroup *src;
1670                struct btrfs_qgroup *dst;
1671
1672                src = find_qgroup_rb(fs_info, i_qgroups[0]);
1673                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1674
1675                if (!src || !dst) {
1676                        ret = -EINVAL;
1677                        goto unlock;
1678                }
1679
1680                dst->excl = src->excl + level_size;
1681                dst->excl_cmpr = src->excl_cmpr + level_size;
1682                i_qgroups += 2;
1683        }
1684
1685unlock:
1686        spin_unlock(&fs_info->qgroup_lock);
1687out:
1688        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1689        return ret;
1690}
1691
1692/*
1693 * reserve some space for a qgroup and all its parents. The reservation takes
1694 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1695 * accounting. If not enough space is available, EDQUOT is returned.
1696 * We assume that the requested space is new for all qgroups.
1697 */
1698int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1699{
1700        struct btrfs_root *quota_root;
1701        struct btrfs_qgroup *qgroup;
1702        struct btrfs_fs_info *fs_info = root->fs_info;
1703        u64 ref_root = root->root_key.objectid;
1704        int ret = 0;
1705        struct ulist_node *unode;
1706        struct ulist_iterator uiter;
1707
1708        if (!is_fstree(ref_root))
1709                return 0;
1710
1711        if (num_bytes == 0)
1712                return 0;
1713
1714        spin_lock(&fs_info->qgroup_lock);
1715        quota_root = fs_info->quota_root;
1716        if (!quota_root)
1717                goto out;
1718
1719        qgroup = find_qgroup_rb(fs_info, ref_root);
1720        if (!qgroup)
1721                goto out;
1722
1723        /*
1724         * in a first step, we check all affected qgroups if any limits would
1725         * be exceeded
1726         */
1727        ulist_reinit(fs_info->qgroup_ulist);
1728        ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1729                        (uintptr_t)qgroup, GFP_ATOMIC);
1730        if (ret < 0)
1731                goto out;
1732        ULIST_ITER_INIT(&uiter);
1733        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1734                struct btrfs_qgroup *qg;
1735                struct btrfs_qgroup_list *glist;
1736
1737                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1738
1739                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740                    qg->reserved + (s64)qg->rfer + num_bytes >
1741                    qg->max_rfer) {
1742                        ret = -EDQUOT;
1743                        goto out;
1744                }
1745
1746                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1747                    qg->reserved + (s64)qg->excl + num_bytes >
1748                    qg->max_excl) {
1749                        ret = -EDQUOT;
1750                        goto out;
1751                }
1752
1753                list_for_each_entry(glist, &qg->groups, next_group) {
1754                        ret = ulist_add(fs_info->qgroup_ulist,
1755                                        glist->group->qgroupid,
1756                                        (uintptr_t)glist->group, GFP_ATOMIC);
1757                        if (ret < 0)
1758                                goto out;
1759                }
1760        }
1761        ret = 0;
1762        /*
1763         * no limits exceeded, now record the reservation into all qgroups
1764         */
1765        ULIST_ITER_INIT(&uiter);
1766        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767                struct btrfs_qgroup *qg;
1768
1769                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1770
1771                qg->reserved += num_bytes;
1772        }
1773
1774out:
1775        spin_unlock(&fs_info->qgroup_lock);
1776        return ret;
1777}
1778
1779void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1780{
1781        struct btrfs_root *quota_root;
1782        struct btrfs_qgroup *qgroup;
1783        struct btrfs_fs_info *fs_info = root->fs_info;
1784        struct ulist_node *unode;
1785        struct ulist_iterator uiter;
1786        u64 ref_root = root->root_key.objectid;
1787        int ret = 0;
1788
1789        if (!is_fstree(ref_root))
1790                return;
1791
1792        if (num_bytes == 0)
1793                return;
1794
1795        spin_lock(&fs_info->qgroup_lock);
1796
1797        quota_root = fs_info->quota_root;
1798        if (!quota_root)
1799                goto out;
1800
1801        qgroup = find_qgroup_rb(fs_info, ref_root);
1802        if (!qgroup)
1803                goto out;
1804
1805        ulist_reinit(fs_info->qgroup_ulist);
1806        ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1807                        (uintptr_t)qgroup, GFP_ATOMIC);
1808        if (ret < 0)
1809                goto out;
1810        ULIST_ITER_INIT(&uiter);
1811        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1812                struct btrfs_qgroup *qg;
1813                struct btrfs_qgroup_list *glist;
1814
1815                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1816
1817                qg->reserved -= num_bytes;
1818
1819                list_for_each_entry(glist, &qg->groups, next_group) {
1820                        ret = ulist_add(fs_info->qgroup_ulist,
1821                                        glist->group->qgroupid,
1822                                        (uintptr_t)glist->group, GFP_ATOMIC);
1823                        if (ret < 0)
1824                                goto out;
1825                }
1826        }
1827
1828out:
1829        spin_unlock(&fs_info->qgroup_lock);
1830}
1831
1832void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1833{
1834        if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1835                return;
1836        btrfs_err(trans->root->fs_info,
1837                "qgroups not uptodate in trans handle %p:  list is%s empty, "
1838                "seq is %#x.%x",
1839                trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1840                (u32)(trans->delayed_ref_elem.seq >> 32),
1841                (u32)trans->delayed_ref_elem.seq);
1842        BUG();
1843}
1844
1845/*
1846 * returns < 0 on error, 0 when more leafs are to be scanned.
1847 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1848 */
1849static int
1850qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851                   struct btrfs_trans_handle *trans, struct ulist *tmp,
1852                   struct extent_buffer *scratch_leaf)
1853{
1854        struct btrfs_key found;
1855        struct ulist *roots = NULL;
1856        struct ulist_node *unode;
1857        struct ulist_iterator uiter;
1858        struct seq_list tree_mod_seq_elem = {};
1859        u64 seq;
1860        int slot;
1861        int ret;
1862
1863        path->leave_spinning = 1;
1864        mutex_lock(&fs_info->qgroup_rescan_lock);
1865        ret = btrfs_search_slot_for_read(fs_info->extent_root,
1866                                         &fs_info->qgroup_rescan_progress,
1867                                         path, 1, 0);
1868
1869        pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1870                 fs_info->qgroup_rescan_progress.objectid,
1871                 fs_info->qgroup_rescan_progress.type,
1872                 fs_info->qgroup_rescan_progress.offset, ret);
1873
1874        if (ret) {
1875                /*
1876                 * The rescan is about to end, we will not be scanning any
1877                 * further blocks. We cannot unset the RESCAN flag here, because
1878                 * we want to commit the transaction if everything went well.
1879                 * To make the live accounting work in this phase, we set our
1880                 * scan progress pointer such that every real extent objectid
1881                 * will be smaller.
1882                 */
1883                fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1884                btrfs_release_path(path);
1885                mutex_unlock(&fs_info->qgroup_rescan_lock);
1886                return ret;
1887        }
1888
1889        btrfs_item_key_to_cpu(path->nodes[0], &found,
1890                              btrfs_header_nritems(path->nodes[0]) - 1);
1891        fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1892
1893        btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1894        memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1895        slot = path->slots[0];
1896        btrfs_release_path(path);
1897        mutex_unlock(&fs_info->qgroup_rescan_lock);
1898
1899        for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900                u64 num_bytes;
1901
1902                btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903                if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904                    found.type != BTRFS_METADATA_ITEM_KEY)
1905                        continue;
1906                if (found.type == BTRFS_METADATA_ITEM_KEY)
1907                        num_bytes = fs_info->extent_root->leafsize;
1908                else
1909                        num_bytes = found.offset;
1910
1911                ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1912                                           tree_mod_seq_elem.seq, &roots);
1913                if (ret < 0)
1914                        goto out;
1915                spin_lock(&fs_info->qgroup_lock);
1916                seq = fs_info->qgroup_seq;
1917                fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918
1919                ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1920                if (ret) {
1921                        spin_unlock(&fs_info->qgroup_lock);
1922                        ulist_free(roots);
1923                        goto out;
1924                }
1925
1926                /*
1927                 * step2 of btrfs_qgroup_account_ref works from a single root,
1928                 * we're doing all at once here.
1929                 */
1930                ulist_reinit(tmp);
1931                ULIST_ITER_INIT(&uiter);
1932                while ((unode = ulist_next(roots, &uiter))) {
1933                        struct btrfs_qgroup *qg;
1934
1935                        qg = find_qgroup_rb(fs_info, unode->val);
1936                        if (!qg)
1937                                continue;
1938
1939                        ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940                                        GFP_ATOMIC);
1941                        if (ret < 0) {
1942                                spin_unlock(&fs_info->qgroup_lock);
1943                                ulist_free(roots);
1944                                goto out;
1945                        }
1946                }
1947
1948                /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949                ULIST_ITER_INIT(&uiter);
1950                while ((unode = ulist_next(tmp, &uiter))) {
1951                        struct btrfs_qgroup *qg;
1952                        struct btrfs_qgroup_list *glist;
1953
1954                        qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955                        qg->rfer += num_bytes;
1956                        qg->rfer_cmpr += num_bytes;
1957                        WARN_ON(qg->tag >= seq);
1958                        if (qg->refcnt - seq == roots->nnodes) {
1959                                qg->excl += num_bytes;
1960                                qg->excl_cmpr += num_bytes;
1961                        }
1962                        qgroup_dirty(fs_info, qg);
1963
1964                        list_for_each_entry(glist, &qg->groups, next_group) {
1965                                ret = ulist_add(tmp, glist->group->qgroupid,
1966                                                (uintptr_t)glist->group,
1967                                                GFP_ATOMIC);
1968                                if (ret < 0) {
1969                                        spin_unlock(&fs_info->qgroup_lock);
1970                                        ulist_free(roots);
1971                                        goto out;
1972                                }
1973                        }
1974                }
1975
1976                spin_unlock(&fs_info->qgroup_lock);
1977                ulist_free(roots);
1978                ret = 0;
1979        }
1980
1981out:
1982        btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983
1984        return ret;
1985}
1986
1987static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1988{
1989        struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1990                                                     qgroup_rescan_work);
1991        struct btrfs_path *path;
1992        struct btrfs_trans_handle *trans = NULL;
1993        struct ulist *tmp = NULL;
1994        struct extent_buffer *scratch_leaf = NULL;
1995        int err = -ENOMEM;
1996
1997        path = btrfs_alloc_path();
1998        if (!path)
1999                goto out;
2000        tmp = ulist_alloc(GFP_NOFS);
2001        if (!tmp)
2002                goto out;
2003        scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2004        if (!scratch_leaf)
2005                goto out;
2006
2007        err = 0;
2008        while (!err) {
2009                trans = btrfs_start_transaction(fs_info->fs_root, 0);
2010                if (IS_ERR(trans)) {
2011                        err = PTR_ERR(trans);
2012                        break;
2013                }
2014                if (!fs_info->quota_enabled) {
2015                        err = -EINTR;
2016                } else {
2017                        err = qgroup_rescan_leaf(fs_info, path, trans,
2018                                                 tmp, scratch_leaf);
2019                }
2020                if (err > 0)
2021                        btrfs_commit_transaction(trans, fs_info->fs_root);
2022                else
2023                        btrfs_end_transaction(trans, fs_info->fs_root);
2024        }
2025
2026out:
2027        kfree(scratch_leaf);
2028        ulist_free(tmp);
2029        btrfs_free_path(path);
2030
2031        mutex_lock(&fs_info->qgroup_rescan_lock);
2032        fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2033
2034        if (err == 2 &&
2035            fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2036                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2037        } else if (err < 0) {
2038                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2039        }
2040        mutex_unlock(&fs_info->qgroup_rescan_lock);
2041
2042        if (err >= 0) {
2043                btrfs_info(fs_info, "qgroup scan completed%s",
2044                        err == 2 ? " (inconsistency flag cleared)" : "");
2045        } else {
2046                btrfs_err(fs_info, "qgroup scan failed with %d", err);
2047        }
2048
2049        complete_all(&fs_info->qgroup_rescan_completion);
2050}
2051
2052/*
2053 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2054 * memory required for the rescan context.
2055 */
2056static int
2057qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2058                   int init_flags)
2059{
2060        int ret = 0;
2061
2062        if (!init_flags &&
2063            (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2064             !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2065                ret = -EINVAL;
2066                goto err;
2067        }
2068
2069        mutex_lock(&fs_info->qgroup_rescan_lock);
2070        spin_lock(&fs_info->qgroup_lock);
2071
2072        if (init_flags) {
2073                if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2074                        ret = -EINPROGRESS;
2075                else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2076                        ret = -EINVAL;
2077
2078                if (ret) {
2079                        spin_unlock(&fs_info->qgroup_lock);
2080                        mutex_unlock(&fs_info->qgroup_rescan_lock);
2081                        goto err;
2082                }
2083
2084                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2085        }
2086
2087        memset(&fs_info->qgroup_rescan_progress, 0,
2088                sizeof(fs_info->qgroup_rescan_progress));
2089        fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2090
2091        spin_unlock(&fs_info->qgroup_lock);
2092        mutex_unlock(&fs_info->qgroup_rescan_lock);
2093
2094        init_completion(&fs_info->qgroup_rescan_completion);
2095
2096        memset(&fs_info->qgroup_rescan_work, 0,
2097               sizeof(fs_info->qgroup_rescan_work));
2098        btrfs_init_work(&fs_info->qgroup_rescan_work,
2099                        btrfs_qgroup_rescan_worker, NULL, NULL);
2100
2101        if (ret) {
2102err:
2103                btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2104                return ret;
2105        }
2106
2107        return 0;
2108}
2109
2110static void
2111qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2112{
2113        struct rb_node *n;
2114        struct btrfs_qgroup *qgroup;
2115
2116        spin_lock(&fs_info->qgroup_lock);
2117        /* clear all current qgroup tracking information */
2118        for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2119                qgroup = rb_entry(n, struct btrfs_qgroup, node);
2120                qgroup->rfer = 0;
2121                qgroup->rfer_cmpr = 0;
2122                qgroup->excl = 0;
2123                qgroup->excl_cmpr = 0;
2124        }
2125        spin_unlock(&fs_info->qgroup_lock);
2126}
2127
2128int
2129btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2130{
2131        int ret = 0;
2132        struct btrfs_trans_handle *trans;
2133
2134        ret = qgroup_rescan_init(fs_info, 0, 1);
2135        if (ret)
2136                return ret;
2137
2138        /*
2139         * We have set the rescan_progress to 0, which means no more
2140         * delayed refs will be accounted by btrfs_qgroup_account_ref.
2141         * However, btrfs_qgroup_account_ref may be right after its call
2142         * to btrfs_find_all_roots, in which case it would still do the
2143         * accounting.
2144         * To solve this, we're committing the transaction, which will
2145         * ensure we run all delayed refs and only after that, we are
2146         * going to clear all tracking information for a clean start.
2147         */
2148
2149        trans = btrfs_join_transaction(fs_info->fs_root);
2150        if (IS_ERR(trans)) {
2151                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2152                return PTR_ERR(trans);
2153        }
2154        ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2155        if (ret) {
2156                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2157                return ret;
2158        }
2159
2160        qgroup_rescan_zero_tracking(fs_info);
2161
2162        btrfs_queue_work(fs_info->qgroup_rescan_workers,
2163                         &fs_info->qgroup_rescan_work);
2164
2165        return 0;
2166}
2167
2168int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2169{
2170        int running;
2171        int ret = 0;
2172
2173        mutex_lock(&fs_info->qgroup_rescan_lock);
2174        spin_lock(&fs_info->qgroup_lock);
2175        running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2176        spin_unlock(&fs_info->qgroup_lock);
2177        mutex_unlock(&fs_info->qgroup_rescan_lock);
2178
2179        if (running)
2180                ret = wait_for_completion_interruptible(
2181                                        &fs_info->qgroup_rescan_completion);
2182
2183        return ret;
2184}
2185
2186/*
2187 * this is only called from open_ctree where we're still single threaded, thus
2188 * locking is omitted here.
2189 */
2190void
2191btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2192{
2193        if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2194                btrfs_queue_work(fs_info->qgroup_rescan_workers,
2195                                 &fs_info->qgroup_rescan_work);
2196}
2197