linux/fs/btrfs/qgroup.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 STRATO.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/pagemap.h>
  21#include <linux/writeback.h>
  22#include <linux/blkdev.h>
  23#include <linux/rbtree.h>
  24#include <linux/slab.h>
  25#include <linux/workqueue.h>
  26#include <linux/btrfs.h>
  27
  28#include "ctree.h"
  29#include "transaction.h"
  30#include "disk-io.h"
  31#include "locking.h"
  32#include "ulist.h"
  33#include "backref.h"
  34#include "extent_io.h"
  35
  36/* TODO XXX FIXME
  37 *  - subvol delete -> delete when ref goes to 0? delete limits also?
  38 *  - reorganize keys
  39 *  - compressed
  40 *  - sync
  41 *  - copy also limits on subvol creation
  42 *  - limit
  43 *  - caches fuer ulists
  44 *  - performance benchmarks
  45 *  - check all ioctl parameters
  46 */
  47
  48/*
  49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
  50 */
  51struct btrfs_qgroup {
  52        u64 qgroupid;
  53
  54        /*
  55         * state
  56         */
  57        u64 rfer;       /* referenced */
  58        u64 rfer_cmpr;  /* referenced compressed */
  59        u64 excl;       /* exclusive */
  60        u64 excl_cmpr;  /* exclusive compressed */
  61
  62        /*
  63         * limits
  64         */
  65        u64 lim_flags;  /* which limits are set */
  66        u64 max_rfer;
  67        u64 max_excl;
  68        u64 rsv_rfer;
  69        u64 rsv_excl;
  70
  71        /*
  72         * reservation tracking
  73         */
  74        u64 reserved;
  75
  76        /*
  77         * lists
  78         */
  79        struct list_head groups;  /* groups this group is member of */
  80        struct list_head members; /* groups that are members of this group */
  81        struct list_head dirty;   /* dirty groups */
  82        struct rb_node node;      /* tree of qgroups */
  83
  84        /*
  85         * temp variables for accounting operations
  86         */
  87        u64 tag;
  88        u64 refcnt;
  89};
  90
  91/*
  92 * glue structure to represent the relations between qgroups.
  93 */
  94struct btrfs_qgroup_list {
  95        struct list_head next_group;
  96        struct list_head next_member;
  97        struct btrfs_qgroup *group;
  98        struct btrfs_qgroup *member;
  99};
 100
 101static int
 102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
 103                   int init_flags);
 104static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
 105
 106/* must be called with qgroup_ioctl_lock held */
 107static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 108                                           u64 qgroupid)
 109{
 110        struct rb_node *n = fs_info->qgroup_tree.rb_node;
 111        struct btrfs_qgroup *qgroup;
 112
 113        while (n) {
 114                qgroup = rb_entry(n, struct btrfs_qgroup, node);
 115                if (qgroup->qgroupid < qgroupid)
 116                        n = n->rb_left;
 117                else if (qgroup->qgroupid > qgroupid)
 118                        n = n->rb_right;
 119                else
 120                        return qgroup;
 121        }
 122        return NULL;
 123}
 124
 125/* must be called with qgroup_lock held */
 126static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
 127                                          u64 qgroupid)
 128{
 129        struct rb_node **p = &fs_info->qgroup_tree.rb_node;
 130        struct rb_node *parent = NULL;
 131        struct btrfs_qgroup *qgroup;
 132
 133        while (*p) {
 134                parent = *p;
 135                qgroup = rb_entry(parent, struct btrfs_qgroup, node);
 136
 137                if (qgroup->qgroupid < qgroupid)
 138                        p = &(*p)->rb_left;
 139                else if (qgroup->qgroupid > qgroupid)
 140                        p = &(*p)->rb_right;
 141                else
 142                        return qgroup;
 143        }
 144
 145        qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
 146        if (!qgroup)
 147                return ERR_PTR(-ENOMEM);
 148
 149        qgroup->qgroupid = qgroupid;
 150        INIT_LIST_HEAD(&qgroup->groups);
 151        INIT_LIST_HEAD(&qgroup->members);
 152        INIT_LIST_HEAD(&qgroup->dirty);
 153
 154        rb_link_node(&qgroup->node, parent, p);
 155        rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
 156
 157        return qgroup;
 158}
 159
 160/* must be called with qgroup_lock held */
 161static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 162{
 163        struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
 164        struct btrfs_qgroup_list *list;
 165
 166        if (!qgroup)
 167                return -ENOENT;
 168
 169        rb_erase(&qgroup->node, &fs_info->qgroup_tree);
 170        list_del(&qgroup->dirty);
 171
 172        while (!list_empty(&qgroup->groups)) {
 173                list = list_first_entry(&qgroup->groups,
 174                                        struct btrfs_qgroup_list, next_group);
 175                list_del(&list->next_group);
 176                list_del(&list->next_member);
 177                kfree(list);
 178        }
 179
 180        while (!list_empty(&qgroup->members)) {
 181                list = list_first_entry(&qgroup->members,
 182                                        struct btrfs_qgroup_list, next_member);
 183                list_del(&list->next_group);
 184                list_del(&list->next_member);
 185                kfree(list);
 186        }
 187        kfree(qgroup);
 188
 189        return 0;
 190}
 191
 192/* must be called with qgroup_lock held */
 193static int add_relation_rb(struct btrfs_fs_info *fs_info,
 194                           u64 memberid, u64 parentid)
 195{
 196        struct btrfs_qgroup *member;
 197        struct btrfs_qgroup *parent;
 198        struct btrfs_qgroup_list *list;
 199
 200        member = find_qgroup_rb(fs_info, memberid);
 201        parent = find_qgroup_rb(fs_info, parentid);
 202        if (!member || !parent)
 203                return -ENOENT;
 204
 205        list = kzalloc(sizeof(*list), GFP_ATOMIC);
 206        if (!list)
 207                return -ENOMEM;
 208
 209        list->group = parent;
 210        list->member = member;
 211        list_add_tail(&list->next_group, &member->groups);
 212        list_add_tail(&list->next_member, &parent->members);
 213
 214        return 0;
 215}
 216
 217/* must be called with qgroup_lock held */
 218static int del_relation_rb(struct btrfs_fs_info *fs_info,
 219                           u64 memberid, u64 parentid)
 220{
 221        struct btrfs_qgroup *member;
 222        struct btrfs_qgroup *parent;
 223        struct btrfs_qgroup_list *list;
 224
 225        member = find_qgroup_rb(fs_info, memberid);
 226        parent = find_qgroup_rb(fs_info, parentid);
 227        if (!member || !parent)
 228                return -ENOENT;
 229
 230        list_for_each_entry(list, &member->groups, next_group) {
 231                if (list->group == parent) {
 232                        list_del(&list->next_group);
 233                        list_del(&list->next_member);
 234                        kfree(list);
 235                        return 0;
 236                }
 237        }
 238        return -ENOENT;
 239}
 240
 241/*
 242 * The full config is read in one go, only called from open_ctree()
 243 * It doesn't use any locking, as at this point we're still single-threaded
 244 */
 245int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 246{
 247        struct btrfs_key key;
 248        struct btrfs_key found_key;
 249        struct btrfs_root *quota_root = fs_info->quota_root;
 250        struct btrfs_path *path = NULL;
 251        struct extent_buffer *l;
 252        int slot;
 253        int ret = 0;
 254        u64 flags = 0;
 255        u64 rescan_progress = 0;
 256
 257        if (!fs_info->quota_enabled)
 258                return 0;
 259
 260        fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 261        if (!fs_info->qgroup_ulist) {
 262                ret = -ENOMEM;
 263                goto out;
 264        }
 265
 266        path = btrfs_alloc_path();
 267        if (!path) {
 268                ret = -ENOMEM;
 269                goto out;
 270        }
 271
 272        /* default this to quota off, in case no status key is found */
 273        fs_info->qgroup_flags = 0;
 274
 275        /*
 276         * pass 1: read status, all qgroup infos and limits
 277         */
 278        key.objectid = 0;
 279        key.type = 0;
 280        key.offset = 0;
 281        ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
 282        if (ret)
 283                goto out;
 284
 285        while (1) {
 286                struct btrfs_qgroup *qgroup;
 287
 288                slot = path->slots[0];
 289                l = path->nodes[0];
 290                btrfs_item_key_to_cpu(l, &found_key, slot);
 291
 292                if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
 293                        struct btrfs_qgroup_status_item *ptr;
 294
 295                        ptr = btrfs_item_ptr(l, slot,
 296                                             struct btrfs_qgroup_status_item);
 297
 298                        if (btrfs_qgroup_status_version(l, ptr) !=
 299                            BTRFS_QGROUP_STATUS_VERSION) {
 300                                printk(KERN_ERR
 301                                 "btrfs: old qgroup version, quota disabled\n");
 302                                goto out;
 303                        }
 304                        if (btrfs_qgroup_status_generation(l, ptr) !=
 305                            fs_info->generation) {
 306                                flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 307                                printk(KERN_ERR
 308                                        "btrfs: qgroup generation mismatch, "
 309                                        "marked as inconsistent\n");
 310                        }
 311                        fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 312                                                                          ptr);
 313                        rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
 314                        goto next1;
 315                }
 316
 317                if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
 318                    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
 319                        goto next1;
 320
 321                qgroup = find_qgroup_rb(fs_info, found_key.offset);
 322                if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
 323                    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
 324                        printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
 325                        flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 326                }
 327                if (!qgroup) {
 328                        qgroup = add_qgroup_rb(fs_info, found_key.offset);
 329                        if (IS_ERR(qgroup)) {
 330                                ret = PTR_ERR(qgroup);
 331                                goto out;
 332                        }
 333                }
 334                switch (found_key.type) {
 335                case BTRFS_QGROUP_INFO_KEY: {
 336                        struct btrfs_qgroup_info_item *ptr;
 337
 338                        ptr = btrfs_item_ptr(l, slot,
 339                                             struct btrfs_qgroup_info_item);
 340                        qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
 341                        qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
 342                        qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
 343                        qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
 344                        /* generation currently unused */
 345                        break;
 346                }
 347                case BTRFS_QGROUP_LIMIT_KEY: {
 348                        struct btrfs_qgroup_limit_item *ptr;
 349
 350                        ptr = btrfs_item_ptr(l, slot,
 351                                             struct btrfs_qgroup_limit_item);
 352                        qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
 353                        qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
 354                        qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
 355                        qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
 356                        qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
 357                        break;
 358                }
 359                }
 360next1:
 361                ret = btrfs_next_item(quota_root, path);
 362                if (ret < 0)
 363                        goto out;
 364                if (ret)
 365                        break;
 366        }
 367        btrfs_release_path(path);
 368
 369        /*
 370         * pass 2: read all qgroup relations
 371         */
 372        key.objectid = 0;
 373        key.type = BTRFS_QGROUP_RELATION_KEY;
 374        key.offset = 0;
 375        ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
 376        if (ret)
 377                goto out;
 378        while (1) {
 379                slot = path->slots[0];
 380                l = path->nodes[0];
 381                btrfs_item_key_to_cpu(l, &found_key, slot);
 382
 383                if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
 384                        goto next2;
 385
 386                if (found_key.objectid > found_key.offset) {
 387                        /* parent <- member, not needed to build config */
 388                        /* FIXME should we omit the key completely? */
 389                        goto next2;
 390                }
 391
 392                ret = add_relation_rb(fs_info, found_key.objectid,
 393                                      found_key.offset);
 394                if (ret == -ENOENT) {
 395                        printk(KERN_WARNING
 396                                "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
 397                                (unsigned long long)found_key.objectid,
 398                                (unsigned long long)found_key.offset);
 399                        ret = 0;        /* ignore the error */
 400                }
 401                if (ret)
 402                        goto out;
 403next2:
 404                ret = btrfs_next_item(quota_root, path);
 405                if (ret < 0)
 406                        goto out;
 407                if (ret)
 408                        break;
 409        }
 410out:
 411        fs_info->qgroup_flags |= flags;
 412        if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
 413                fs_info->quota_enabled = 0;
 414                fs_info->pending_quota_state = 0;
 415        } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
 416                   ret >= 0) {
 417                ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
 418        }
 419        btrfs_free_path(path);
 420
 421        if (ret < 0) {
 422                ulist_free(fs_info->qgroup_ulist);
 423                fs_info->qgroup_ulist = NULL;
 424                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 425        }
 426
 427        return ret < 0 ? ret : 0;
 428}
 429
 430/*
 431 * This is only called from close_ctree() or open_ctree(), both in single-
 432 * treaded paths. Clean up the in-memory structures. No locking needed.
 433 */
 434void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 435{
 436        struct rb_node *n;
 437        struct btrfs_qgroup *qgroup;
 438        struct btrfs_qgroup_list *list;
 439
 440        while ((n = rb_first(&fs_info->qgroup_tree))) {
 441                qgroup = rb_entry(n, struct btrfs_qgroup, node);
 442                rb_erase(n, &fs_info->qgroup_tree);
 443
 444                while (!list_empty(&qgroup->groups)) {
 445                        list = list_first_entry(&qgroup->groups,
 446                                                struct btrfs_qgroup_list,
 447                                                next_group);
 448                        list_del(&list->next_group);
 449                        list_del(&list->next_member);
 450                        kfree(list);
 451                }
 452
 453                while (!list_empty(&qgroup->members)) {
 454                        list = list_first_entry(&qgroup->members,
 455                                                struct btrfs_qgroup_list,
 456                                                next_member);
 457                        list_del(&list->next_group);
 458                        list_del(&list->next_member);
 459                        kfree(list);
 460                }
 461                kfree(qgroup);
 462        }
 463        ulist_free(fs_info->qgroup_ulist);
 464}
 465
 466static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
 467                                    struct btrfs_root *quota_root,
 468                                    u64 src, u64 dst)
 469{
 470        int ret;
 471        struct btrfs_path *path;
 472        struct btrfs_key key;
 473
 474        path = btrfs_alloc_path();
 475        if (!path)
 476                return -ENOMEM;
 477
 478        key.objectid = src;
 479        key.type = BTRFS_QGROUP_RELATION_KEY;
 480        key.offset = dst;
 481
 482        ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
 483
 484        btrfs_mark_buffer_dirty(path->nodes[0]);
 485
 486        btrfs_free_path(path);
 487        return ret;
 488}
 489
 490static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
 491                                    struct btrfs_root *quota_root,
 492                                    u64 src, u64 dst)
 493{
 494        int ret;
 495        struct btrfs_path *path;
 496        struct btrfs_key key;
 497
 498        path = btrfs_alloc_path();
 499        if (!path)
 500                return -ENOMEM;
 501
 502        key.objectid = src;
 503        key.type = BTRFS_QGROUP_RELATION_KEY;
 504        key.offset = dst;
 505
 506        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 507        if (ret < 0)
 508                goto out;
 509
 510        if (ret > 0) {
 511                ret = -ENOENT;
 512                goto out;
 513        }
 514
 515        ret = btrfs_del_item(trans, quota_root, path);
 516out:
 517        btrfs_free_path(path);
 518        return ret;
 519}
 520
 521static int add_qgroup_item(struct btrfs_trans_handle *trans,
 522                           struct btrfs_root *quota_root, u64 qgroupid)
 523{
 524        int ret;
 525        struct btrfs_path *path;
 526        struct btrfs_qgroup_info_item *qgroup_info;
 527        struct btrfs_qgroup_limit_item *qgroup_limit;
 528        struct extent_buffer *leaf;
 529        struct btrfs_key key;
 530
 531        path = btrfs_alloc_path();
 532        if (!path)
 533                return -ENOMEM;
 534
 535        key.objectid = 0;
 536        key.type = BTRFS_QGROUP_INFO_KEY;
 537        key.offset = qgroupid;
 538
 539        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 540                                      sizeof(*qgroup_info));
 541        if (ret)
 542                goto out;
 543
 544        leaf = path->nodes[0];
 545        qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
 546                                 struct btrfs_qgroup_info_item);
 547        btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
 548        btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
 549        btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
 550        btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
 551        btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
 552
 553        btrfs_mark_buffer_dirty(leaf);
 554
 555        btrfs_release_path(path);
 556
 557        key.type = BTRFS_QGROUP_LIMIT_KEY;
 558        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 559                                      sizeof(*qgroup_limit));
 560        if (ret)
 561                goto out;
 562
 563        leaf = path->nodes[0];
 564        qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
 565                                  struct btrfs_qgroup_limit_item);
 566        btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
 567        btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
 568        btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
 569        btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
 570        btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
 571
 572        btrfs_mark_buffer_dirty(leaf);
 573
 574        ret = 0;
 575out:
 576        btrfs_free_path(path);
 577        return ret;
 578}
 579
 580static int del_qgroup_item(struct btrfs_trans_handle *trans,
 581                           struct btrfs_root *quota_root, u64 qgroupid)
 582{
 583        int ret;
 584        struct btrfs_path *path;
 585        struct btrfs_key key;
 586
 587        path = btrfs_alloc_path();
 588        if (!path)
 589                return -ENOMEM;
 590
 591        key.objectid = 0;
 592        key.type = BTRFS_QGROUP_INFO_KEY;
 593        key.offset = qgroupid;
 594        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 595        if (ret < 0)
 596                goto out;
 597
 598        if (ret > 0) {
 599                ret = -ENOENT;
 600                goto out;
 601        }
 602
 603        ret = btrfs_del_item(trans, quota_root, path);
 604        if (ret)
 605                goto out;
 606
 607        btrfs_release_path(path);
 608
 609        key.type = BTRFS_QGROUP_LIMIT_KEY;
 610        ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
 611        if (ret < 0)
 612                goto out;
 613
 614        if (ret > 0) {
 615                ret = -ENOENT;
 616                goto out;
 617        }
 618
 619        ret = btrfs_del_item(trans, quota_root, path);
 620
 621out:
 622        btrfs_free_path(path);
 623        return ret;
 624}
 625
 626static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
 627                                    struct btrfs_root *root, u64 qgroupid,
 628                                    u64 flags, u64 max_rfer, u64 max_excl,
 629                                    u64 rsv_rfer, u64 rsv_excl)
 630{
 631        struct btrfs_path *path;
 632        struct btrfs_key key;
 633        struct extent_buffer *l;
 634        struct btrfs_qgroup_limit_item *qgroup_limit;
 635        int ret;
 636        int slot;
 637
 638        key.objectid = 0;
 639        key.type = BTRFS_QGROUP_LIMIT_KEY;
 640        key.offset = qgroupid;
 641
 642        path = btrfs_alloc_path();
 643        if (!path)
 644                return -ENOMEM;
 645
 646        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 647        if (ret > 0)
 648                ret = -ENOENT;
 649
 650        if (ret)
 651                goto out;
 652
 653        l = path->nodes[0];
 654        slot = path->slots[0];
 655        qgroup_limit = btrfs_item_ptr(l, path->slots[0],
 656                                      struct btrfs_qgroup_limit_item);
 657        btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
 658        btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
 659        btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
 660        btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
 661        btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
 662
 663        btrfs_mark_buffer_dirty(l);
 664
 665out:
 666        btrfs_free_path(path);
 667        return ret;
 668}
 669
 670static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
 671                                   struct btrfs_root *root,
 672                                   struct btrfs_qgroup *qgroup)
 673{
 674        struct btrfs_path *path;
 675        struct btrfs_key key;
 676        struct extent_buffer *l;
 677        struct btrfs_qgroup_info_item *qgroup_info;
 678        int ret;
 679        int slot;
 680
 681        key.objectid = 0;
 682        key.type = BTRFS_QGROUP_INFO_KEY;
 683        key.offset = qgroup->qgroupid;
 684
 685        path = btrfs_alloc_path();
 686        if (!path)
 687                return -ENOMEM;
 688
 689        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 690        if (ret > 0)
 691                ret = -ENOENT;
 692
 693        if (ret)
 694                goto out;
 695
 696        l = path->nodes[0];
 697        slot = path->slots[0];
 698        qgroup_info = btrfs_item_ptr(l, path->slots[0],
 699                                 struct btrfs_qgroup_info_item);
 700        btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
 701        btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
 702        btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
 703        btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
 704        btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
 705
 706        btrfs_mark_buffer_dirty(l);
 707
 708out:
 709        btrfs_free_path(path);
 710        return ret;
 711}
 712
 713static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 714                                     struct btrfs_fs_info *fs_info,
 715                                    struct btrfs_root *root)
 716{
 717        struct btrfs_path *path;
 718        struct btrfs_key key;
 719        struct extent_buffer *l;
 720        struct btrfs_qgroup_status_item *ptr;
 721        int ret;
 722        int slot;
 723
 724        key.objectid = 0;
 725        key.type = BTRFS_QGROUP_STATUS_KEY;
 726        key.offset = 0;
 727
 728        path = btrfs_alloc_path();
 729        if (!path)
 730                return -ENOMEM;
 731
 732        ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 733        if (ret > 0)
 734                ret = -ENOENT;
 735
 736        if (ret)
 737                goto out;
 738
 739        l = path->nodes[0];
 740        slot = path->slots[0];
 741        ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
 742        btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 743        btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
 744        btrfs_set_qgroup_status_rescan(l, ptr,
 745                                fs_info->qgroup_rescan_progress.objectid);
 746
 747        btrfs_mark_buffer_dirty(l);
 748
 749out:
 750        btrfs_free_path(path);
 751        return ret;
 752}
 753
 754/*
 755 * called with qgroup_lock held
 756 */
 757static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
 758                                  struct btrfs_root *root)
 759{
 760        struct btrfs_path *path;
 761        struct btrfs_key key;
 762        struct extent_buffer *leaf = NULL;
 763        int ret;
 764        int nr = 0;
 765
 766        path = btrfs_alloc_path();
 767        if (!path)
 768                return -ENOMEM;
 769
 770        path->leave_spinning = 1;
 771
 772        key.objectid = 0;
 773        key.offset = 0;
 774        key.type = 0;
 775
 776        while (1) {
 777                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 778                if (ret < 0)
 779                        goto out;
 780                leaf = path->nodes[0];
 781                nr = btrfs_header_nritems(leaf);
 782                if (!nr)
 783                        break;
 784                /*
 785                 * delete the leaf one by one
 786                 * since the whole tree is going
 787                 * to be deleted.
 788                 */
 789                path->slots[0] = 0;
 790                ret = btrfs_del_items(trans, root, path, 0, nr);
 791                if (ret)
 792                        goto out;
 793
 794                btrfs_release_path(path);
 795        }
 796        ret = 0;
 797out:
 798        root->fs_info->pending_quota_state = 0;
 799        btrfs_free_path(path);
 800        return ret;
 801}
 802
 803int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 804                       struct btrfs_fs_info *fs_info)
 805{
 806        struct btrfs_root *quota_root;
 807        struct btrfs_root *tree_root = fs_info->tree_root;
 808        struct btrfs_path *path = NULL;
 809        struct btrfs_qgroup_status_item *ptr;
 810        struct extent_buffer *leaf;
 811        struct btrfs_key key;
 812        struct btrfs_key found_key;
 813        struct btrfs_qgroup *qgroup = NULL;
 814        int ret = 0;
 815        int slot;
 816
 817        mutex_lock(&fs_info->qgroup_ioctl_lock);
 818        if (fs_info->quota_root) {
 819                fs_info->pending_quota_state = 1;
 820                goto out;
 821        }
 822
 823        fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
 824        if (!fs_info->qgroup_ulist) {
 825                ret = -ENOMEM;
 826                goto out;
 827        }
 828
 829        /*
 830         * initially create the quota tree
 831         */
 832        quota_root = btrfs_create_tree(trans, fs_info,
 833                                       BTRFS_QUOTA_TREE_OBJECTID);
 834        if (IS_ERR(quota_root)) {
 835                ret =  PTR_ERR(quota_root);
 836                goto out;
 837        }
 838
 839        path = btrfs_alloc_path();
 840        if (!path) {
 841                ret = -ENOMEM;
 842                goto out_free_root;
 843        }
 844
 845        key.objectid = 0;
 846        key.type = BTRFS_QGROUP_STATUS_KEY;
 847        key.offset = 0;
 848
 849        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
 850                                      sizeof(*ptr));
 851        if (ret)
 852                goto out_free_path;
 853
 854        leaf = path->nodes[0];
 855        ptr = btrfs_item_ptr(leaf, path->slots[0],
 856                                 struct btrfs_qgroup_status_item);
 857        btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
 858        btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
 859        fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 860                                BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 861        btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
 862        btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 863
 864        btrfs_mark_buffer_dirty(leaf);
 865
 866        key.objectid = 0;
 867        key.type = BTRFS_ROOT_REF_KEY;
 868        key.offset = 0;
 869
 870        btrfs_release_path(path);
 871        ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
 872        if (ret > 0)
 873                goto out_add_root;
 874        if (ret < 0)
 875                goto out_free_path;
 876
 877
 878        while (1) {
 879                slot = path->slots[0];
 880                leaf = path->nodes[0];
 881                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 882
 883                if (found_key.type == BTRFS_ROOT_REF_KEY) {
 884                        ret = add_qgroup_item(trans, quota_root,
 885                                              found_key.offset);
 886                        if (ret)
 887                                goto out_free_path;
 888
 889                        qgroup = add_qgroup_rb(fs_info, found_key.offset);
 890                        if (IS_ERR(qgroup)) {
 891                                ret = PTR_ERR(qgroup);
 892                                goto out_free_path;
 893                        }
 894                }
 895                ret = btrfs_next_item(tree_root, path);
 896                if (ret < 0)
 897                        goto out_free_path;
 898                if (ret)
 899                        break;
 900        }
 901
 902out_add_root:
 903        btrfs_release_path(path);
 904        ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
 905        if (ret)
 906                goto out_free_path;
 907
 908        qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
 909        if (IS_ERR(qgroup)) {
 910                ret = PTR_ERR(qgroup);
 911                goto out_free_path;
 912        }
 913        spin_lock(&fs_info->qgroup_lock);
 914        fs_info->quota_root = quota_root;
 915        fs_info->pending_quota_state = 1;
 916        spin_unlock(&fs_info->qgroup_lock);
 917out_free_path:
 918        btrfs_free_path(path);
 919out_free_root:
 920        if (ret) {
 921                free_extent_buffer(quota_root->node);
 922                free_extent_buffer(quota_root->commit_root);
 923                kfree(quota_root);
 924        }
 925out:
 926        if (ret) {
 927                ulist_free(fs_info->qgroup_ulist);
 928                fs_info->qgroup_ulist = NULL;
 929        }
 930        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 931        return ret;
 932}
 933
 934int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 935                        struct btrfs_fs_info *fs_info)
 936{
 937        struct btrfs_root *tree_root = fs_info->tree_root;
 938        struct btrfs_root *quota_root;
 939        int ret = 0;
 940
 941        mutex_lock(&fs_info->qgroup_ioctl_lock);
 942        if (!fs_info->quota_root)
 943                goto out;
 944        spin_lock(&fs_info->qgroup_lock);
 945        fs_info->quota_enabled = 0;
 946        fs_info->pending_quota_state = 0;
 947        quota_root = fs_info->quota_root;
 948        fs_info->quota_root = NULL;
 949        btrfs_free_qgroup_config(fs_info);
 950        spin_unlock(&fs_info->qgroup_lock);
 951
 952        if (!quota_root) {
 953                ret = -EINVAL;
 954                goto out;
 955        }
 956
 957        ret = btrfs_clean_quota_tree(trans, quota_root);
 958        if (ret)
 959                goto out;
 960
 961        ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
 962        if (ret)
 963                goto out;
 964
 965        list_del(&quota_root->dirty_list);
 966
 967        btrfs_tree_lock(quota_root->node);
 968        clean_tree_block(trans, tree_root, quota_root->node);
 969        btrfs_tree_unlock(quota_root->node);
 970        btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 971
 972        free_extent_buffer(quota_root->node);
 973        free_extent_buffer(quota_root->commit_root);
 974        kfree(quota_root);
 975out:
 976        mutex_unlock(&fs_info->qgroup_ioctl_lock);
 977        return ret;
 978}
 979
 980static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 981                         struct btrfs_qgroup *qgroup)
 982{
 983        if (list_empty(&qgroup->dirty))
 984                list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 985}
 986
 987int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 988                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 989{
 990        struct btrfs_root *quota_root;
 991        struct btrfs_qgroup *parent;
 992        struct btrfs_qgroup *member;
 993        struct btrfs_qgroup_list *list;
 994        int ret = 0;
 995
 996        mutex_lock(&fs_info->qgroup_ioctl_lock);
 997        quota_root = fs_info->quota_root;
 998        if (!quota_root) {
 999                ret = -EINVAL;
1000                goto out;
1001        }
1002        member = find_qgroup_rb(fs_info, src);
1003        parent = find_qgroup_rb(fs_info, dst);
1004        if (!member || !parent) {
1005                ret = -EINVAL;
1006                goto out;
1007        }
1008
1009        /* check if such qgroup relation exist firstly */
1010        list_for_each_entry(list, &member->groups, next_group) {
1011                if (list->group == parent) {
1012                        ret = -EEXIST;
1013                        goto out;
1014                }
1015        }
1016
1017        ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1018        if (ret)
1019                goto out;
1020
1021        ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1022        if (ret) {
1023                del_qgroup_relation_item(trans, quota_root, src, dst);
1024                goto out;
1025        }
1026
1027        spin_lock(&fs_info->qgroup_lock);
1028        ret = add_relation_rb(quota_root->fs_info, src, dst);
1029        spin_unlock(&fs_info->qgroup_lock);
1030out:
1031        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1032        return ret;
1033}
1034
1035int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1036                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1037{
1038        struct btrfs_root *quota_root;
1039        struct btrfs_qgroup *parent;
1040        struct btrfs_qgroup *member;
1041        struct btrfs_qgroup_list *list;
1042        int ret = 0;
1043        int err;
1044
1045        mutex_lock(&fs_info->qgroup_ioctl_lock);
1046        quota_root = fs_info->quota_root;
1047        if (!quota_root) {
1048                ret = -EINVAL;
1049                goto out;
1050        }
1051
1052        member = find_qgroup_rb(fs_info, src);
1053        parent = find_qgroup_rb(fs_info, dst);
1054        if (!member || !parent) {
1055                ret = -EINVAL;
1056                goto out;
1057        }
1058
1059        /* check if such qgroup relation exist firstly */
1060        list_for_each_entry(list, &member->groups, next_group) {
1061                if (list->group == parent)
1062                        goto exist;
1063        }
1064        ret = -ENOENT;
1065        goto out;
1066exist:
1067        ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1068        err = del_qgroup_relation_item(trans, quota_root, dst, src);
1069        if (err && !ret)
1070                ret = err;
1071
1072        spin_lock(&fs_info->qgroup_lock);
1073        del_relation_rb(fs_info, src, dst);
1074        spin_unlock(&fs_info->qgroup_lock);
1075out:
1076        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1077        return ret;
1078}
1079
1080int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1081                        struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1082{
1083        struct btrfs_root *quota_root;
1084        struct btrfs_qgroup *qgroup;
1085        int ret = 0;
1086
1087        mutex_lock(&fs_info->qgroup_ioctl_lock);
1088        quota_root = fs_info->quota_root;
1089        if (!quota_root) {
1090                ret = -EINVAL;
1091                goto out;
1092        }
1093        qgroup = find_qgroup_rb(fs_info, qgroupid);
1094        if (qgroup) {
1095                ret = -EEXIST;
1096                goto out;
1097        }
1098
1099        ret = add_qgroup_item(trans, quota_root, qgroupid);
1100        if (ret)
1101                goto out;
1102
1103        spin_lock(&fs_info->qgroup_lock);
1104        qgroup = add_qgroup_rb(fs_info, qgroupid);
1105        spin_unlock(&fs_info->qgroup_lock);
1106
1107        if (IS_ERR(qgroup))
1108                ret = PTR_ERR(qgroup);
1109out:
1110        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1111        return ret;
1112}
1113
1114int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1115                        struct btrfs_fs_info *fs_info, u64 qgroupid)
1116{
1117        struct btrfs_root *quota_root;
1118        struct btrfs_qgroup *qgroup;
1119        int ret = 0;
1120
1121        mutex_lock(&fs_info->qgroup_ioctl_lock);
1122        quota_root = fs_info->quota_root;
1123        if (!quota_root) {
1124                ret = -EINVAL;
1125                goto out;
1126        }
1127
1128        qgroup = find_qgroup_rb(fs_info, qgroupid);
1129        if (!qgroup) {
1130                ret = -ENOENT;
1131                goto out;
1132        } else {
1133                /* check if there are no relations to this qgroup */
1134                if (!list_empty(&qgroup->groups) ||
1135                    !list_empty(&qgroup->members)) {
1136                        ret = -EBUSY;
1137                        goto out;
1138                }
1139        }
1140        ret = del_qgroup_item(trans, quota_root, qgroupid);
1141
1142        spin_lock(&fs_info->qgroup_lock);
1143        del_qgroup_rb(quota_root->fs_info, qgroupid);
1144        spin_unlock(&fs_info->qgroup_lock);
1145out:
1146        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1147        return ret;
1148}
1149
1150int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1151                       struct btrfs_fs_info *fs_info, u64 qgroupid,
1152                       struct btrfs_qgroup_limit *limit)
1153{
1154        struct btrfs_root *quota_root;
1155        struct btrfs_qgroup *qgroup;
1156        int ret = 0;
1157
1158        mutex_lock(&fs_info->qgroup_ioctl_lock);
1159        quota_root = fs_info->quota_root;
1160        if (!quota_root) {
1161                ret = -EINVAL;
1162                goto out;
1163        }
1164
1165        qgroup = find_qgroup_rb(fs_info, qgroupid);
1166        if (!qgroup) {
1167                ret = -ENOENT;
1168                goto out;
1169        }
1170        ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1171                                       limit->flags, limit->max_rfer,
1172                                       limit->max_excl, limit->rsv_rfer,
1173                                       limit->rsv_excl);
1174        if (ret) {
1175                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1176                printk(KERN_INFO "unable to update quota limit for %llu\n",
1177                       (unsigned long long)qgroupid);
1178        }
1179
1180        spin_lock(&fs_info->qgroup_lock);
1181        qgroup->lim_flags = limit->flags;
1182        qgroup->max_rfer = limit->max_rfer;
1183        qgroup->max_excl = limit->max_excl;
1184        qgroup->rsv_rfer = limit->rsv_rfer;
1185        qgroup->rsv_excl = limit->rsv_excl;
1186        spin_unlock(&fs_info->qgroup_lock);
1187out:
1188        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1189        return ret;
1190}
1191
1192/*
1193 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1194 * the modification into a list that's later used by btrfs_end_transaction to
1195 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1196 */
1197int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1198                            struct btrfs_delayed_ref_node *node,
1199                            struct btrfs_delayed_extent_op *extent_op)
1200{
1201        struct qgroup_update *u;
1202
1203        BUG_ON(!trans->delayed_ref_elem.seq);
1204        u = kmalloc(sizeof(*u), GFP_NOFS);
1205        if (!u)
1206                return -ENOMEM;
1207
1208        u->node = node;
1209        u->extent_op = extent_op;
1210        list_add_tail(&u->list, &trans->qgroup_ref_list);
1211
1212        return 0;
1213}
1214
1215static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1216                                    struct ulist *roots, struct ulist *tmp,
1217                                    u64 seq)
1218{
1219        struct ulist_node *unode;
1220        struct ulist_iterator uiter;
1221        struct ulist_node *tmp_unode;
1222        struct ulist_iterator tmp_uiter;
1223        struct btrfs_qgroup *qg;
1224        int ret;
1225
1226        ULIST_ITER_INIT(&uiter);
1227        while ((unode = ulist_next(roots, &uiter))) {
1228                qg = find_qgroup_rb(fs_info, unode->val);
1229                if (!qg)
1230                        continue;
1231
1232                ulist_reinit(tmp);
1233                                                /* XXX id not needed */
1234                ret = ulist_add(tmp, qg->qgroupid,
1235                                (u64)(uintptr_t)qg, GFP_ATOMIC);
1236                if (ret < 0)
1237                        return ret;
1238                ULIST_ITER_INIT(&tmp_uiter);
1239                while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1240                        struct btrfs_qgroup_list *glist;
1241
1242                        qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1243                        if (qg->refcnt < seq)
1244                                qg->refcnt = seq + 1;
1245                        else
1246                                ++qg->refcnt;
1247
1248                        list_for_each_entry(glist, &qg->groups, next_group) {
1249                                ret = ulist_add(tmp, glist->group->qgroupid,
1250                                                (u64)(uintptr_t)glist->group,
1251                                                GFP_ATOMIC);
1252                                if (ret < 0)
1253                                        return ret;
1254                        }
1255                }
1256        }
1257
1258        return 0;
1259}
1260
1261static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1262                                    struct ulist *roots, struct ulist *tmp,
1263                                    u64 seq, int sgn, u64 num_bytes,
1264                                    struct btrfs_qgroup *qgroup)
1265{
1266        struct ulist_node *unode;
1267        struct ulist_iterator uiter;
1268        struct btrfs_qgroup *qg;
1269        struct btrfs_qgroup_list *glist;
1270        int ret;
1271
1272        ulist_reinit(tmp);
1273        ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1274        if (ret < 0)
1275                return ret;
1276
1277        ULIST_ITER_INIT(&uiter);
1278        while ((unode = ulist_next(tmp, &uiter))) {
1279                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1280                if (qg->refcnt < seq) {
1281                        /* not visited by step 1 */
1282                        qg->rfer += sgn * num_bytes;
1283                        qg->rfer_cmpr += sgn * num_bytes;
1284                        if (roots->nnodes == 0) {
1285                                qg->excl += sgn * num_bytes;
1286                                qg->excl_cmpr += sgn * num_bytes;
1287                        }
1288                        qgroup_dirty(fs_info, qg);
1289                }
1290                WARN_ON(qg->tag >= seq);
1291                qg->tag = seq;
1292
1293                list_for_each_entry(glist, &qg->groups, next_group) {
1294                        ret = ulist_add(tmp, glist->group->qgroupid,
1295                                        (uintptr_t)glist->group, GFP_ATOMIC);
1296                        if (ret < 0)
1297                                return ret;
1298                }
1299        }
1300
1301        return 0;
1302}
1303
1304static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1305                                    struct ulist *roots, struct ulist *tmp,
1306                                    u64 seq, int sgn, u64 num_bytes)
1307{
1308        struct ulist_node *unode;
1309        struct ulist_iterator uiter;
1310        struct btrfs_qgroup *qg;
1311        struct ulist_node *tmp_unode;
1312        struct ulist_iterator tmp_uiter;
1313        int ret;
1314
1315        ULIST_ITER_INIT(&uiter);
1316        while ((unode = ulist_next(roots, &uiter))) {
1317                qg = find_qgroup_rb(fs_info, unode->val);
1318                if (!qg)
1319                        continue;
1320
1321                ulist_reinit(tmp);
1322                ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1323                if (ret < 0)
1324                        return ret;
1325
1326                ULIST_ITER_INIT(&tmp_uiter);
1327                while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1328                        struct btrfs_qgroup_list *glist;
1329
1330                        qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1331                        if (qg->tag == seq)
1332                                continue;
1333
1334                        if (qg->refcnt - seq == roots->nnodes) {
1335                                qg->excl -= sgn * num_bytes;
1336                                qg->excl_cmpr -= sgn * num_bytes;
1337                                qgroup_dirty(fs_info, qg);
1338                        }
1339
1340                        list_for_each_entry(glist, &qg->groups, next_group) {
1341                                ret = ulist_add(tmp, glist->group->qgroupid,
1342                                                (uintptr_t)glist->group,
1343                                                GFP_ATOMIC);
1344                                if (ret < 0)
1345                                        return ret;
1346                        }
1347                }
1348        }
1349
1350        return 0;
1351}
1352
1353/*
1354 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1355 * from the fs. First, all roots referencing the extent are searched, and
1356 * then the space is accounted accordingly to the different roots. The
1357 * accounting algorithm works in 3 steps documented inline.
1358 */
1359int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1360                             struct btrfs_fs_info *fs_info,
1361                             struct btrfs_delayed_ref_node *node,
1362                             struct btrfs_delayed_extent_op *extent_op)
1363{
1364        struct btrfs_key ins;
1365        struct btrfs_root *quota_root;
1366        u64 ref_root;
1367        struct btrfs_qgroup *qgroup;
1368        struct ulist *roots = NULL;
1369        u64 seq;
1370        int ret = 0;
1371        int sgn;
1372
1373        if (!fs_info->quota_enabled)
1374                return 0;
1375
1376        BUG_ON(!fs_info->quota_root);
1377
1378        ins.objectid = node->bytenr;
1379        ins.offset = node->num_bytes;
1380        ins.type = BTRFS_EXTENT_ITEM_KEY;
1381
1382        if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1383            node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1384                struct btrfs_delayed_tree_ref *ref;
1385                ref = btrfs_delayed_node_to_tree_ref(node);
1386                ref_root = ref->root;
1387        } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1388                   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1389                struct btrfs_delayed_data_ref *ref;
1390                ref = btrfs_delayed_node_to_data_ref(node);
1391                ref_root = ref->root;
1392        } else {
1393                BUG();
1394        }
1395
1396        if (!is_fstree(ref_root)) {
1397                /*
1398                 * non-fs-trees are not being accounted
1399                 */
1400                return 0;
1401        }
1402
1403        switch (node->action) {
1404        case BTRFS_ADD_DELAYED_REF:
1405        case BTRFS_ADD_DELAYED_EXTENT:
1406                sgn = 1;
1407                seq = btrfs_tree_mod_seq_prev(node->seq);
1408                break;
1409        case BTRFS_DROP_DELAYED_REF:
1410                sgn = -1;
1411                seq = node->seq;
1412                break;
1413        case BTRFS_UPDATE_DELAYED_HEAD:
1414                return 0;
1415        default:
1416                BUG();
1417        }
1418
1419        mutex_lock(&fs_info->qgroup_rescan_lock);
1420        if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1421                if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1422                        mutex_unlock(&fs_info->qgroup_rescan_lock);
1423                        return 0;
1424                }
1425        }
1426        mutex_unlock(&fs_info->qgroup_rescan_lock);
1427
1428        /*
1429         * the delayed ref sequence number we pass depends on the direction of
1430         * the operation. for add operations, we pass
1431         * tree_mod_log_prev_seq(node->seq) to skip
1432         * the delayed ref's current sequence number, because we need the state
1433         * of the tree before the add operation. for delete operations, we pass
1434         * (node->seq) to include the delayed ref's current sequence number,
1435         * because we need the state of the tree after the delete operation.
1436         */
1437        ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1438        if (ret < 0)
1439                return ret;
1440
1441        spin_lock(&fs_info->qgroup_lock);
1442
1443        quota_root = fs_info->quota_root;
1444        if (!quota_root)
1445                goto unlock;
1446
1447        qgroup = find_qgroup_rb(fs_info, ref_root);
1448        if (!qgroup)
1449                goto unlock;
1450
1451        /*
1452         * step 1: for each old ref, visit all nodes once and inc refcnt
1453         */
1454        ulist_reinit(fs_info->qgroup_ulist);
1455        seq = fs_info->qgroup_seq;
1456        fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1457
1458        ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1459                                       seq);
1460        if (ret)
1461                goto unlock;
1462
1463        /*
1464         * step 2: walk from the new root
1465         */
1466        ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1467                                       seq, sgn, node->num_bytes, qgroup);
1468        if (ret)
1469                goto unlock;
1470
1471        /*
1472         * step 3: walk again from old refs
1473         */
1474        ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1475                                       seq, sgn, node->num_bytes);
1476        if (ret)
1477                goto unlock;
1478
1479unlock:
1480        spin_unlock(&fs_info->qgroup_lock);
1481        ulist_free(roots);
1482
1483        return ret;
1484}
1485
1486/*
1487 * called from commit_transaction. Writes all changed qgroups to disk.
1488 */
1489int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1490                      struct btrfs_fs_info *fs_info)
1491{
1492        struct btrfs_root *quota_root = fs_info->quota_root;
1493        int ret = 0;
1494        int start_rescan_worker = 0;
1495
1496        if (!quota_root)
1497                goto out;
1498
1499        if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1500                start_rescan_worker = 1;
1501
1502        fs_info->quota_enabled = fs_info->pending_quota_state;
1503
1504        spin_lock(&fs_info->qgroup_lock);
1505        while (!list_empty(&fs_info->dirty_qgroups)) {
1506                struct btrfs_qgroup *qgroup;
1507                qgroup = list_first_entry(&fs_info->dirty_qgroups,
1508                                          struct btrfs_qgroup, dirty);
1509                list_del_init(&qgroup->dirty);
1510                spin_unlock(&fs_info->qgroup_lock);
1511                ret = update_qgroup_info_item(trans, quota_root, qgroup);
1512                if (ret)
1513                        fs_info->qgroup_flags |=
1514                                        BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1515                spin_lock(&fs_info->qgroup_lock);
1516        }
1517        if (fs_info->quota_enabled)
1518                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1519        else
1520                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1521        spin_unlock(&fs_info->qgroup_lock);
1522
1523        ret = update_qgroup_status_item(trans, fs_info, quota_root);
1524        if (ret)
1525                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1526
1527        if (!ret && start_rescan_worker) {
1528                ret = qgroup_rescan_init(fs_info, 0, 1);
1529                if (!ret) {
1530                        qgroup_rescan_zero_tracking(fs_info);
1531                        btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
1532                                           &fs_info->qgroup_rescan_work);
1533                }
1534                ret = 0;
1535        }
1536
1537out:
1538
1539        return ret;
1540}
1541
1542/*
1543 * copy the acounting information between qgroups. This is necessary when a
1544 * snapshot or a subvolume is created
1545 */
1546int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1547                         struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1548                         struct btrfs_qgroup_inherit *inherit)
1549{
1550        int ret = 0;
1551        int i;
1552        u64 *i_qgroups;
1553        struct btrfs_root *quota_root = fs_info->quota_root;
1554        struct btrfs_qgroup *srcgroup;
1555        struct btrfs_qgroup *dstgroup;
1556        u32 level_size = 0;
1557        u64 nums;
1558
1559        mutex_lock(&fs_info->qgroup_ioctl_lock);
1560        if (!fs_info->quota_enabled)
1561                goto out;
1562
1563        if (!quota_root) {
1564                ret = -EINVAL;
1565                goto out;
1566        }
1567
1568        if (inherit) {
1569                i_qgroups = (u64 *)(inherit + 1);
1570                nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1571                       2 * inherit->num_excl_copies;
1572                for (i = 0; i < nums; ++i) {
1573                        srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1574                        if (!srcgroup) {
1575                                ret = -EINVAL;
1576                                goto out;
1577                        }
1578                        ++i_qgroups;
1579                }
1580        }
1581
1582        /*
1583         * create a tracking group for the subvol itself
1584         */
1585        ret = add_qgroup_item(trans, quota_root, objectid);
1586        if (ret)
1587                goto out;
1588
1589        if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1590                ret = update_qgroup_limit_item(trans, quota_root, objectid,
1591                                               inherit->lim.flags,
1592                                               inherit->lim.max_rfer,
1593                                               inherit->lim.max_excl,
1594                                               inherit->lim.rsv_rfer,
1595                                               inherit->lim.rsv_excl);
1596                if (ret)
1597                        goto out;
1598        }
1599
1600        if (srcid) {
1601                struct btrfs_root *srcroot;
1602                struct btrfs_key srckey;
1603                int srcroot_level;
1604
1605                srckey.objectid = srcid;
1606                srckey.type = BTRFS_ROOT_ITEM_KEY;
1607                srckey.offset = (u64)-1;
1608                srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1609                if (IS_ERR(srcroot)) {
1610                        ret = PTR_ERR(srcroot);
1611                        goto out;
1612                }
1613
1614                rcu_read_lock();
1615                srcroot_level = btrfs_header_level(srcroot->node);
1616                level_size = btrfs_level_size(srcroot, srcroot_level);
1617                rcu_read_unlock();
1618        }
1619
1620        /*
1621         * add qgroup to all inherited groups
1622         */
1623        if (inherit) {
1624                i_qgroups = (u64 *)(inherit + 1);
1625                for (i = 0; i < inherit->num_qgroups; ++i) {
1626                        ret = add_qgroup_relation_item(trans, quota_root,
1627                                                       objectid, *i_qgroups);
1628                        if (ret)
1629                                goto out;
1630                        ret = add_qgroup_relation_item(trans, quota_root,
1631                                                       *i_qgroups, objectid);
1632                        if (ret)
1633                                goto out;
1634                        ++i_qgroups;
1635                }
1636        }
1637
1638
1639        spin_lock(&fs_info->qgroup_lock);
1640
1641        dstgroup = add_qgroup_rb(fs_info, objectid);
1642        if (IS_ERR(dstgroup)) {
1643                ret = PTR_ERR(dstgroup);
1644                goto unlock;
1645        }
1646
1647        if (srcid) {
1648                srcgroup = find_qgroup_rb(fs_info, srcid);
1649                if (!srcgroup)
1650                        goto unlock;
1651                dstgroup->rfer = srcgroup->rfer - level_size;
1652                dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1653                srcgroup->excl = level_size;
1654                srcgroup->excl_cmpr = level_size;
1655                qgroup_dirty(fs_info, dstgroup);
1656                qgroup_dirty(fs_info, srcgroup);
1657        }
1658
1659        if (!inherit)
1660                goto unlock;
1661
1662        i_qgroups = (u64 *)(inherit + 1);
1663        for (i = 0; i < inherit->num_qgroups; ++i) {
1664                ret = add_relation_rb(quota_root->fs_info, objectid,
1665                                      *i_qgroups);
1666                if (ret)
1667                        goto unlock;
1668                ++i_qgroups;
1669        }
1670
1671        for (i = 0; i <  inherit->num_ref_copies; ++i) {
1672                struct btrfs_qgroup *src;
1673                struct btrfs_qgroup *dst;
1674
1675                src = find_qgroup_rb(fs_info, i_qgroups[0]);
1676                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1677
1678                if (!src || !dst) {
1679                        ret = -EINVAL;
1680                        goto unlock;
1681                }
1682
1683                dst->rfer = src->rfer - level_size;
1684                dst->rfer_cmpr = src->rfer_cmpr - level_size;
1685                i_qgroups += 2;
1686        }
1687        for (i = 0; i <  inherit->num_excl_copies; ++i) {
1688                struct btrfs_qgroup *src;
1689                struct btrfs_qgroup *dst;
1690
1691                src = find_qgroup_rb(fs_info, i_qgroups[0]);
1692                dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1693
1694                if (!src || !dst) {
1695                        ret = -EINVAL;
1696                        goto unlock;
1697                }
1698
1699                dst->excl = src->excl + level_size;
1700                dst->excl_cmpr = src->excl_cmpr + level_size;
1701                i_qgroups += 2;
1702        }
1703
1704unlock:
1705        spin_unlock(&fs_info->qgroup_lock);
1706out:
1707        mutex_unlock(&fs_info->qgroup_ioctl_lock);
1708        return ret;
1709}
1710
1711/*
1712 * reserve some space for a qgroup and all its parents. The reservation takes
1713 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1714 * accounting. If not enough space is available, EDQUOT is returned.
1715 * We assume that the requested space is new for all qgroups.
1716 */
1717int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1718{
1719        struct btrfs_root *quota_root;
1720        struct btrfs_qgroup *qgroup;
1721        struct btrfs_fs_info *fs_info = root->fs_info;
1722        u64 ref_root = root->root_key.objectid;
1723        int ret = 0;
1724        struct ulist_node *unode;
1725        struct ulist_iterator uiter;
1726
1727        if (!is_fstree(ref_root))
1728                return 0;
1729
1730        if (num_bytes == 0)
1731                return 0;
1732
1733        spin_lock(&fs_info->qgroup_lock);
1734        quota_root = fs_info->quota_root;
1735        if (!quota_root)
1736                goto out;
1737
1738        qgroup = find_qgroup_rb(fs_info, ref_root);
1739        if (!qgroup)
1740                goto out;
1741
1742        /*
1743         * in a first step, we check all affected qgroups if any limits would
1744         * be exceeded
1745         */
1746        ulist_reinit(fs_info->qgroup_ulist);
1747        ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1748                        (uintptr_t)qgroup, GFP_ATOMIC);
1749        if (ret < 0)
1750                goto out;
1751        ULIST_ITER_INIT(&uiter);
1752        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1753                struct btrfs_qgroup *qg;
1754                struct btrfs_qgroup_list *glist;
1755
1756                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1757
1758                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1759                    qg->reserved + (s64)qg->rfer + num_bytes >
1760                    qg->max_rfer) {
1761                        ret = -EDQUOT;
1762                        goto out;
1763                }
1764
1765                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1766                    qg->reserved + (s64)qg->excl + num_bytes >
1767                    qg->max_excl) {
1768                        ret = -EDQUOT;
1769                        goto out;
1770                }
1771
1772                list_for_each_entry(glist, &qg->groups, next_group) {
1773                        ret = ulist_add(fs_info->qgroup_ulist,
1774                                        glist->group->qgroupid,
1775                                        (uintptr_t)glist->group, GFP_ATOMIC);
1776                        if (ret < 0)
1777                                goto out;
1778                }
1779        }
1780        ret = 0;
1781        /*
1782         * no limits exceeded, now record the reservation into all qgroups
1783         */
1784        ULIST_ITER_INIT(&uiter);
1785        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1786                struct btrfs_qgroup *qg;
1787
1788                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1789
1790                qg->reserved += num_bytes;
1791        }
1792
1793out:
1794        spin_unlock(&fs_info->qgroup_lock);
1795        return ret;
1796}
1797
1798void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1799{
1800        struct btrfs_root *quota_root;
1801        struct btrfs_qgroup *qgroup;
1802        struct btrfs_fs_info *fs_info = root->fs_info;
1803        struct ulist_node *unode;
1804        struct ulist_iterator uiter;
1805        u64 ref_root = root->root_key.objectid;
1806        int ret = 0;
1807
1808        if (!is_fstree(ref_root))
1809                return;
1810
1811        if (num_bytes == 0)
1812                return;
1813
1814        spin_lock(&fs_info->qgroup_lock);
1815
1816        quota_root = fs_info->quota_root;
1817        if (!quota_root)
1818                goto out;
1819
1820        qgroup = find_qgroup_rb(fs_info, ref_root);
1821        if (!qgroup)
1822                goto out;
1823
1824        ulist_reinit(fs_info->qgroup_ulist);
1825        ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1826                        (uintptr_t)qgroup, GFP_ATOMIC);
1827        if (ret < 0)
1828                goto out;
1829        ULIST_ITER_INIT(&uiter);
1830        while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1831                struct btrfs_qgroup *qg;
1832                struct btrfs_qgroup_list *glist;
1833
1834                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1835
1836                qg->reserved -= num_bytes;
1837
1838                list_for_each_entry(glist, &qg->groups, next_group) {
1839                        ret = ulist_add(fs_info->qgroup_ulist,
1840                                        glist->group->qgroupid,
1841                                        (uintptr_t)glist->group, GFP_ATOMIC);
1842                        if (ret < 0)
1843                                goto out;
1844                }
1845        }
1846
1847out:
1848        spin_unlock(&fs_info->qgroup_lock);
1849}
1850
1851void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1852{
1853        if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1854                return;
1855        pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1856                trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1857                (u32)(trans->delayed_ref_elem.seq >> 32),
1858                (u32)trans->delayed_ref_elem.seq);
1859        BUG();
1860}
1861
1862/*
1863 * returns < 0 on error, 0 when more leafs are to be scanned.
1864 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1865 */
1866static int
1867qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1868                   struct btrfs_trans_handle *trans, struct ulist *tmp,
1869                   struct extent_buffer *scratch_leaf)
1870{
1871        struct btrfs_key found;
1872        struct ulist *roots = NULL;
1873        struct ulist_node *unode;
1874        struct ulist_iterator uiter;
1875        struct seq_list tree_mod_seq_elem = {};
1876        u64 seq;
1877        int slot;
1878        int ret;
1879
1880        path->leave_spinning = 1;
1881        mutex_lock(&fs_info->qgroup_rescan_lock);
1882        ret = btrfs_search_slot_for_read(fs_info->extent_root,
1883                                         &fs_info->qgroup_rescan_progress,
1884                                         path, 1, 0);
1885
1886        pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1887                 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1888                 fs_info->qgroup_rescan_progress.type,
1889                 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1890                 ret);
1891
1892        if (ret) {
1893                /*
1894                 * The rescan is about to end, we will not be scanning any
1895                 * further blocks. We cannot unset the RESCAN flag here, because
1896                 * we want to commit the transaction if everything went well.
1897                 * To make the live accounting work in this phase, we set our
1898                 * scan progress pointer such that every real extent objectid
1899                 * will be smaller.
1900                 */
1901                fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1902                btrfs_release_path(path);
1903                mutex_unlock(&fs_info->qgroup_rescan_lock);
1904                return ret;
1905        }
1906
1907        btrfs_item_key_to_cpu(path->nodes[0], &found,
1908                              btrfs_header_nritems(path->nodes[0]) - 1);
1909        fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1910
1911        btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1912        memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1913        slot = path->slots[0];
1914        btrfs_release_path(path);
1915        mutex_unlock(&fs_info->qgroup_rescan_lock);
1916
1917        for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1918                btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1919                if (found.type != BTRFS_EXTENT_ITEM_KEY)
1920                        continue;
1921                ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1922                                           tree_mod_seq_elem.seq, &roots);
1923                if (ret < 0)
1924                        goto out;
1925                spin_lock(&fs_info->qgroup_lock);
1926                seq = fs_info->qgroup_seq;
1927                fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1928
1929                ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1930                if (ret) {
1931                        spin_unlock(&fs_info->qgroup_lock);
1932                        ulist_free(roots);
1933                        goto out;
1934                }
1935
1936                /*
1937                 * step2 of btrfs_qgroup_account_ref works from a single root,
1938                 * we're doing all at once here.
1939                 */
1940                ulist_reinit(tmp);
1941                ULIST_ITER_INIT(&uiter);
1942                while ((unode = ulist_next(roots, &uiter))) {
1943                        struct btrfs_qgroup *qg;
1944
1945                        qg = find_qgroup_rb(fs_info, unode->val);
1946                        if (!qg)
1947                                continue;
1948
1949                        ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1950                                        GFP_ATOMIC);
1951                        if (ret < 0) {
1952                                spin_unlock(&fs_info->qgroup_lock);
1953                                ulist_free(roots);
1954                                goto out;
1955                        }
1956                }
1957
1958                /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1959                ULIST_ITER_INIT(&uiter);
1960                while ((unode = ulist_next(tmp, &uiter))) {
1961                        struct btrfs_qgroup *qg;
1962                        struct btrfs_qgroup_list *glist;
1963
1964                        qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1965                        qg->rfer += found.offset;
1966                        qg->rfer_cmpr += found.offset;
1967                        WARN_ON(qg->tag >= seq);
1968                        if (qg->refcnt - seq == roots->nnodes) {
1969                                qg->excl += found.offset;
1970                                qg->excl_cmpr += found.offset;
1971                        }
1972                        qgroup_dirty(fs_info, qg);
1973
1974                        list_for_each_entry(glist, &qg->groups, next_group) {
1975                                ret = ulist_add(tmp, glist->group->qgroupid,
1976                                                (uintptr_t)glist->group,
1977                                                GFP_ATOMIC);
1978                                if (ret < 0) {
1979                                        spin_unlock(&fs_info->qgroup_lock);
1980                                        ulist_free(roots);
1981                                        goto out;
1982                                }
1983                        }
1984                }
1985
1986                spin_unlock(&fs_info->qgroup_lock);
1987                ulist_free(roots);
1988                ret = 0;
1989        }
1990
1991out:
1992        btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1993
1994        return ret;
1995}
1996
1997static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1998{
1999        struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2000                                                     qgroup_rescan_work);
2001        struct btrfs_path *path;
2002        struct btrfs_trans_handle *trans = NULL;
2003        struct ulist *tmp = NULL;
2004        struct extent_buffer *scratch_leaf = NULL;
2005        int err = -ENOMEM;
2006
2007        path = btrfs_alloc_path();
2008        if (!path)
2009                goto out;
2010        tmp = ulist_alloc(GFP_NOFS);
2011        if (!tmp)
2012                goto out;
2013        scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2014        if (!scratch_leaf)
2015                goto out;
2016
2017        err = 0;
2018        while (!err) {
2019                trans = btrfs_start_transaction(fs_info->fs_root, 0);
2020                if (IS_ERR(trans)) {
2021                        err = PTR_ERR(trans);
2022                        break;
2023                }
2024                if (!fs_info->quota_enabled) {
2025                        err = -EINTR;
2026                } else {
2027                        err = qgroup_rescan_leaf(fs_info, path, trans,
2028                                                 tmp, scratch_leaf);
2029                }
2030                if (err > 0)
2031                        btrfs_commit_transaction(trans, fs_info->fs_root);
2032                else
2033                        btrfs_end_transaction(trans, fs_info->fs_root);
2034        }
2035
2036out:
2037        kfree(scratch_leaf);
2038        ulist_free(tmp);
2039        btrfs_free_path(path);
2040
2041        mutex_lock(&fs_info->qgroup_rescan_lock);
2042        fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2043
2044        if (err == 2 &&
2045            fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2046                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2047        } else if (err < 0) {
2048                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2049        }
2050        mutex_unlock(&fs_info->qgroup_rescan_lock);
2051
2052        if (err >= 0) {
2053                pr_info("btrfs: qgroup scan completed%s\n",
2054                        err == 2 ? " (inconsistency flag cleared)" : "");
2055        } else {
2056                pr_err("btrfs: qgroup scan failed with %d\n", err);
2057        }
2058
2059        complete_all(&fs_info->qgroup_rescan_completion);
2060}
2061
2062/*
2063 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2064 * memory required for the rescan context.
2065 */
2066static int
2067qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2068                   int init_flags)
2069{
2070        int ret = 0;
2071
2072        if (!init_flags &&
2073            (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2074             !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2075                ret = -EINVAL;
2076                goto err;
2077        }
2078
2079        mutex_lock(&fs_info->qgroup_rescan_lock);
2080        spin_lock(&fs_info->qgroup_lock);
2081
2082        if (init_flags) {
2083                if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2084                        ret = -EINPROGRESS;
2085                else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2086                        ret = -EINVAL;
2087
2088                if (ret) {
2089                        spin_unlock(&fs_info->qgroup_lock);
2090                        mutex_unlock(&fs_info->qgroup_rescan_lock);
2091                        goto err;
2092                }
2093
2094                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2095        }
2096
2097        memset(&fs_info->qgroup_rescan_progress, 0,
2098                sizeof(fs_info->qgroup_rescan_progress));
2099        fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2100
2101        spin_unlock(&fs_info->qgroup_lock);
2102        mutex_unlock(&fs_info->qgroup_rescan_lock);
2103
2104        init_completion(&fs_info->qgroup_rescan_completion);
2105
2106        memset(&fs_info->qgroup_rescan_work, 0,
2107               sizeof(fs_info->qgroup_rescan_work));
2108        fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker;
2109
2110        if (ret) {
2111err:
2112                pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
2113                return ret;
2114        }
2115
2116        return 0;
2117}
2118
2119static void
2120qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2121{
2122        struct rb_node *n;
2123        struct btrfs_qgroup *qgroup;
2124
2125        spin_lock(&fs_info->qgroup_lock);
2126        /* clear all current qgroup tracking information */
2127        for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2128                qgroup = rb_entry(n, struct btrfs_qgroup, node);
2129                qgroup->rfer = 0;
2130                qgroup->rfer_cmpr = 0;
2131                qgroup->excl = 0;
2132                qgroup->excl_cmpr = 0;
2133        }
2134        spin_unlock(&fs_info->qgroup_lock);
2135}
2136
2137int
2138btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2139{
2140        int ret = 0;
2141        struct btrfs_trans_handle *trans;
2142
2143        ret = qgroup_rescan_init(fs_info, 0, 1);
2144        if (ret)
2145                return ret;
2146
2147        /*
2148         * We have set the rescan_progress to 0, which means no more
2149         * delayed refs will be accounted by btrfs_qgroup_account_ref.
2150         * However, btrfs_qgroup_account_ref may be right after its call
2151         * to btrfs_find_all_roots, in which case it would still do the
2152         * accounting.
2153         * To solve this, we're committing the transaction, which will
2154         * ensure we run all delayed refs and only after that, we are
2155         * going to clear all tracking information for a clean start.
2156         */
2157
2158        trans = btrfs_join_transaction(fs_info->fs_root);
2159        if (IS_ERR(trans)) {
2160                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2161                return PTR_ERR(trans);
2162        }
2163        ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2164        if (ret) {
2165                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2166                return ret;
2167        }
2168
2169        qgroup_rescan_zero_tracking(fs_info);
2170
2171        btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2172                           &fs_info->qgroup_rescan_work);
2173
2174        return 0;
2175}
2176
2177int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2178{
2179        int running;
2180        int ret = 0;
2181
2182        mutex_lock(&fs_info->qgroup_rescan_lock);
2183        spin_lock(&fs_info->qgroup_lock);
2184        running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2185        spin_unlock(&fs_info->qgroup_lock);
2186        mutex_unlock(&fs_info->qgroup_rescan_lock);
2187
2188        if (running)
2189                ret = wait_for_completion_interruptible(
2190                                        &fs_info->qgroup_rescan_completion);
2191
2192        return ret;
2193}
2194
2195/*
2196 * this is only called from open_ctree where we're still single threaded, thus
2197 * locking is omitted here.
2198 */
2199void
2200btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2201{
2202        if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2203                btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2204                                   &fs_info->qgroup_rescan_work);
2205}
2206