linux/fs/ocfs2/xattr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* -*- mode: c; c-basic-offset: 8; -*-
   3 * vim: noexpandtab sw=8 ts=8 sts=0:
   4 *
   5 * xattr.c
   6 *
   7 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
   8 *
   9 * CREDITS:
  10 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
  11 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  12 */
  13
  14#include <linux/capability.h>
  15#include <linux/fs.h>
  16#include <linux/types.h>
  17#include <linux/slab.h>
  18#include <linux/highmem.h>
  19#include <linux/pagemap.h>
  20#include <linux/uio.h>
  21#include <linux/sched.h>
  22#include <linux/splice.h>
  23#include <linux/mount.h>
  24#include <linux/writeback.h>
  25#include <linux/falloc.h>
  26#include <linux/sort.h>
  27#include <linux/init.h>
  28#include <linux/module.h>
  29#include <linux/string.h>
  30#include <linux/security.h>
  31
  32#include <cluster/masklog.h>
  33
  34#include "ocfs2.h"
  35#include "alloc.h"
  36#include "blockcheck.h"
  37#include "dlmglue.h"
  38#include "file.h"
  39#include "symlink.h"
  40#include "sysfile.h"
  41#include "inode.h"
  42#include "journal.h"
  43#include "ocfs2_fs.h"
  44#include "suballoc.h"
  45#include "uptodate.h"
  46#include "buffer_head_io.h"
  47#include "super.h"
  48#include "xattr.h"
  49#include "refcounttree.h"
  50#include "acl.h"
  51#include "ocfs2_trace.h"
  52
  53struct ocfs2_xattr_def_value_root {
  54        struct ocfs2_xattr_value_root   xv;
  55        struct ocfs2_extent_rec         er;
  56};
  57
  58struct ocfs2_xattr_bucket {
  59        /* The inode these xattrs are associated with */
  60        struct inode *bu_inode;
  61
  62        /* The actual buffers that make up the bucket */
  63        struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
  64
  65        /* How many blocks make up one bucket for this filesystem */
  66        int bu_blocks;
  67};
  68
  69struct ocfs2_xattr_set_ctxt {
  70        handle_t *handle;
  71        struct ocfs2_alloc_context *meta_ac;
  72        struct ocfs2_alloc_context *data_ac;
  73        struct ocfs2_cached_dealloc_ctxt dealloc;
  74        int set_abort;
  75};
  76
  77#define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
  78#define OCFS2_XATTR_INLINE_SIZE 80
  79#define OCFS2_XATTR_HEADER_GAP  4
  80#define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
  81                                         - sizeof(struct ocfs2_xattr_header) \
  82                                         - OCFS2_XATTR_HEADER_GAP)
  83#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
  84                                         - sizeof(struct ocfs2_xattr_block) \
  85                                         - sizeof(struct ocfs2_xattr_header) \
  86                                         - OCFS2_XATTR_HEADER_GAP)
  87
  88static struct ocfs2_xattr_def_value_root def_xv = {
  89        .xv.xr_list.l_count = cpu_to_le16(1),
  90};
  91
  92const struct xattr_handler *ocfs2_xattr_handlers[] = {
  93        &ocfs2_xattr_user_handler,
  94        &posix_acl_access_xattr_handler,
  95        &posix_acl_default_xattr_handler,
  96        &ocfs2_xattr_trusted_handler,
  97        &ocfs2_xattr_security_handler,
  98        NULL
  99};
 100
 101static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 102        [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
 103        [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
 104                                        = &posix_acl_access_xattr_handler,
 105        [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
 106                                        = &posix_acl_default_xattr_handler,
 107        [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
 108        [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
 109};
 110
 111struct ocfs2_xattr_info {
 112        int             xi_name_index;
 113        const char      *xi_name;
 114        int             xi_name_len;
 115        const void      *xi_value;
 116        size_t          xi_value_len;
 117};
 118
 119struct ocfs2_xattr_search {
 120        struct buffer_head *inode_bh;
 121        /*
 122         * xattr_bh point to the block buffer head which has extended attribute
 123         * when extended attribute in inode, xattr_bh is equal to inode_bh.
 124         */
 125        struct buffer_head *xattr_bh;
 126        struct ocfs2_xattr_header *header;
 127        struct ocfs2_xattr_bucket *bucket;
 128        void *base;
 129        void *end;
 130        struct ocfs2_xattr_entry *here;
 131        int not_found;
 132};
 133
 134/* Operations on struct ocfs2_xa_entry */
 135struct ocfs2_xa_loc;
 136struct ocfs2_xa_loc_operations {
 137        /*
 138         * Journal functions
 139         */
 140        int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
 141                                  int type);
 142        void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
 143
 144        /*
 145         * Return a pointer to the appropriate buffer in loc->xl_storage
 146         * at the given offset from loc->xl_header.
 147         */
 148        void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
 149
 150        /* Can we reuse the existing entry for the new value? */
 151        int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
 152                             struct ocfs2_xattr_info *xi);
 153
 154        /* How much space is needed for the new value? */
 155        int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
 156                               struct ocfs2_xattr_info *xi);
 157
 158        /*
 159         * Return the offset of the first name+value pair.  This is
 160         * the start of our downward-filling free space.
 161         */
 162        int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
 163
 164        /*
 165         * Remove the name+value at this location.  Do whatever is
 166         * appropriate with the remaining name+value pairs.
 167         */
 168        void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
 169
 170        /* Fill xl_entry with a new entry */
 171        void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
 172
 173        /* Add name+value storage to an entry */
 174        void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
 175
 176        /*
 177         * Initialize the value buf's access and bh fields for this entry.
 178         * ocfs2_xa_fill_value_buf() will handle the xv pointer.
 179         */
 180        void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
 181                                   struct ocfs2_xattr_value_buf *vb);
 182};
 183
 184/*
 185 * Describes an xattr entry location.  This is a memory structure
 186 * tracking the on-disk structure.
 187 */
 188struct ocfs2_xa_loc {
 189        /* This xattr belongs to this inode */
 190        struct inode *xl_inode;
 191
 192        /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
 193        struct ocfs2_xattr_header *xl_header;
 194
 195        /* Bytes from xl_header to the end of the storage */
 196        int xl_size;
 197
 198        /*
 199         * The ocfs2_xattr_entry this location describes.  If this is
 200         * NULL, this location describes the on-disk structure where it
 201         * would have been.
 202         */
 203        struct ocfs2_xattr_entry *xl_entry;
 204
 205        /*
 206         * Internal housekeeping
 207         */
 208
 209        /* Buffer(s) containing this entry */
 210        void *xl_storage;
 211
 212        /* Operations on the storage backing this location */
 213        const struct ocfs2_xa_loc_operations *xl_ops;
 214};
 215
 216/*
 217 * Convenience functions to calculate how much space is needed for a
 218 * given name+value pair
 219 */
 220static int namevalue_size(int name_len, uint64_t value_len)
 221{
 222        if (value_len > OCFS2_XATTR_INLINE_SIZE)
 223                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 224        else
 225                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
 226}
 227
 228static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
 229{
 230        return namevalue_size(xi->xi_name_len, xi->xi_value_len);
 231}
 232
 233static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
 234{
 235        u64 value_len = le64_to_cpu(xe->xe_value_size);
 236
 237        BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
 238               ocfs2_xattr_is_local(xe));
 239        return namevalue_size(xe->xe_name_len, value_len);
 240}
 241
 242
 243static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 244                                             struct ocfs2_xattr_header *xh,
 245                                             int index,
 246                                             int *block_off,
 247                                             int *new_offset);
 248
 249static int ocfs2_xattr_block_find(struct inode *inode,
 250                                  int name_index,
 251                                  const char *name,
 252                                  struct ocfs2_xattr_search *xs);
 253static int ocfs2_xattr_index_block_find(struct inode *inode,
 254                                        struct buffer_head *root_bh,
 255                                        int name_index,
 256                                        const char *name,
 257                                        struct ocfs2_xattr_search *xs);
 258
 259static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 260                                        struct buffer_head *blk_bh,
 261                                        char *buffer,
 262                                        size_t buffer_size);
 263
 264static int ocfs2_xattr_create_index_block(struct inode *inode,
 265                                          struct ocfs2_xattr_search *xs,
 266                                          struct ocfs2_xattr_set_ctxt *ctxt);
 267
 268static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 269                                             struct ocfs2_xattr_info *xi,
 270                                             struct ocfs2_xattr_search *xs,
 271                                             struct ocfs2_xattr_set_ctxt *ctxt);
 272
 273typedef int (xattr_tree_rec_func)(struct inode *inode,
 274                                  struct buffer_head *root_bh,
 275                                  u64 blkno, u32 cpos, u32 len, void *para);
 276static int ocfs2_iterate_xattr_index_block(struct inode *inode,
 277                                           struct buffer_head *root_bh,
 278                                           xattr_tree_rec_func *rec_func,
 279                                           void *para);
 280static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 281                                        struct ocfs2_xattr_bucket *bucket,
 282                                        void *para);
 283static int ocfs2_rm_xattr_cluster(struct inode *inode,
 284                                  struct buffer_head *root_bh,
 285                                  u64 blkno,
 286                                  u32 cpos,
 287                                  u32 len,
 288                                  void *para);
 289
 290static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 291                                  u64 src_blk, u64 last_blk, u64 to_blk,
 292                                  unsigned int start_bucket,
 293                                  u32 *first_hash);
 294static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 295                                        struct ocfs2_dinode *di,
 296                                        struct ocfs2_xattr_info *xi,
 297                                        struct ocfs2_xattr_search *xis,
 298                                        struct ocfs2_xattr_search *xbs,
 299                                        struct ocfs2_refcount_tree **ref_tree,
 300                                        int *meta_need,
 301                                        int *credits);
 302static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 303                                           struct ocfs2_xattr_bucket *bucket,
 304                                           int offset,
 305                                           struct ocfs2_xattr_value_root **xv,
 306                                           struct buffer_head **bh);
 307
 308static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 309{
 310        return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
 311}
 312
 313static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
 314{
 315        return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
 316}
 317
 318#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
 319#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 320#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 321
 322static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
 323{
 324        struct ocfs2_xattr_bucket *bucket;
 325        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 326
 327        BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
 328
 329        bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
 330        if (bucket) {
 331                bucket->bu_inode = inode;
 332                bucket->bu_blocks = blks;
 333        }
 334
 335        return bucket;
 336}
 337
 338static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
 339{
 340        int i;
 341
 342        for (i = 0; i < bucket->bu_blocks; i++) {
 343                brelse(bucket->bu_bhs[i]);
 344                bucket->bu_bhs[i] = NULL;
 345        }
 346}
 347
 348static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
 349{
 350        if (bucket) {
 351                ocfs2_xattr_bucket_relse(bucket);
 352                bucket->bu_inode = NULL;
 353                kfree(bucket);
 354        }
 355}
 356
 357/*
 358 * A bucket that has never been written to disk doesn't need to be
 359 * read.  We just need the buffer_heads.  Don't call this for
 360 * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
 361 * them fully.
 362 */
 363static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 364                                   u64 xb_blkno, int new)
 365{
 366        int i, rc = 0;
 367
 368        for (i = 0; i < bucket->bu_blocks; i++) {
 369                bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
 370                                              xb_blkno + i);
 371                if (!bucket->bu_bhs[i]) {
 372                        rc = -ENOMEM;
 373                        mlog_errno(rc);
 374                        break;
 375                }
 376
 377                if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 378                                           bucket->bu_bhs[i])) {
 379                        if (new)
 380                                ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 381                                                              bucket->bu_bhs[i]);
 382                        else {
 383                                set_buffer_uptodate(bucket->bu_bhs[i]);
 384                                ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 385                                                          bucket->bu_bhs[i]);
 386                        }
 387                }
 388        }
 389
 390        if (rc)
 391                ocfs2_xattr_bucket_relse(bucket);
 392        return rc;
 393}
 394
 395/* Read the xattr bucket at xb_blkno */
 396static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 397                                   u64 xb_blkno)
 398{
 399        int rc;
 400
 401        rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 402                               bucket->bu_blocks, bucket->bu_bhs, 0,
 403                               NULL);
 404        if (!rc) {
 405                spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 406                rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
 407                                                 bucket->bu_bhs,
 408                                                 bucket->bu_blocks,
 409                                                 &bucket_xh(bucket)->xh_check);
 410                spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 411                if (rc)
 412                        mlog_errno(rc);
 413        }
 414
 415        if (rc)
 416                ocfs2_xattr_bucket_relse(bucket);
 417        return rc;
 418}
 419
 420static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 421                                             struct ocfs2_xattr_bucket *bucket,
 422                                             int type)
 423{
 424        int i, rc = 0;
 425
 426        for (i = 0; i < bucket->bu_blocks; i++) {
 427                rc = ocfs2_journal_access(handle,
 428                                          INODE_CACHE(bucket->bu_inode),
 429                                          bucket->bu_bhs[i], type);
 430                if (rc) {
 431                        mlog_errno(rc);
 432                        break;
 433                }
 434        }
 435
 436        return rc;
 437}
 438
 439static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 440                                             struct ocfs2_xattr_bucket *bucket)
 441{
 442        int i;
 443
 444        spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 445        ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
 446                                   bucket->bu_bhs, bucket->bu_blocks,
 447                                   &bucket_xh(bucket)->xh_check);
 448        spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 449
 450        for (i = 0; i < bucket->bu_blocks; i++)
 451                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 452}
 453
 454static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 455                                         struct ocfs2_xattr_bucket *src)
 456{
 457        int i;
 458        int blocksize = src->bu_inode->i_sb->s_blocksize;
 459
 460        BUG_ON(dest->bu_blocks != src->bu_blocks);
 461        BUG_ON(dest->bu_inode != src->bu_inode);
 462
 463        for (i = 0; i < src->bu_blocks; i++) {
 464                memcpy(bucket_block(dest, i), bucket_block(src, i),
 465                       blocksize);
 466        }
 467}
 468
 469static int ocfs2_validate_xattr_block(struct super_block *sb,
 470                                      struct buffer_head *bh)
 471{
 472        int rc;
 473        struct ocfs2_xattr_block *xb =
 474                (struct ocfs2_xattr_block *)bh->b_data;
 475
 476        trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
 477
 478        BUG_ON(!buffer_uptodate(bh));
 479
 480        /*
 481         * If the ecc fails, we return the error but otherwise
 482         * leave the filesystem running.  We know any error is
 483         * local to this block.
 484         */
 485        rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
 486        if (rc)
 487                return rc;
 488
 489        /*
 490         * Errors after here are fatal
 491         */
 492
 493        if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
 494                return ocfs2_error(sb,
 495                                   "Extended attribute block #%llu has bad signature %.*s\n",
 496                                   (unsigned long long)bh->b_blocknr, 7,
 497                                   xb->xb_signature);
 498        }
 499
 500        if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
 501                return ocfs2_error(sb,
 502                                   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
 503                                   (unsigned long long)bh->b_blocknr,
 504                                   (unsigned long long)le64_to_cpu(xb->xb_blkno));
 505        }
 506
 507        if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
 508                return ocfs2_error(sb,
 509                                   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
 510                                   (unsigned long long)bh->b_blocknr,
 511                                   le32_to_cpu(xb->xb_fs_generation));
 512        }
 513
 514        return 0;
 515}
 516
 517static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 518                                  struct buffer_head **bh)
 519{
 520        int rc;
 521        struct buffer_head *tmp = *bh;
 522
 523        rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 524                              ocfs2_validate_xattr_block);
 525
 526        /* If ocfs2_read_block() got us a new bh, pass it up. */
 527        if (!rc && !*bh)
 528                *bh = tmp;
 529
 530        return rc;
 531}
 532
 533static inline const char *ocfs2_xattr_prefix(int name_index)
 534{
 535        const struct xattr_handler *handler = NULL;
 536
 537        if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
 538                handler = ocfs2_xattr_handler_map[name_index];
 539        return handler ? xattr_prefix(handler) : NULL;
 540}
 541
 542static u32 ocfs2_xattr_name_hash(struct inode *inode,
 543                                 const char *name,
 544                                 int name_len)
 545{
 546        /* Get hash value of uuid from super block */
 547        u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
 548        int i;
 549
 550        /* hash extended attribute name */
 551        for (i = 0; i < name_len; i++) {
 552                hash = (hash << OCFS2_HASH_SHIFT) ^
 553                       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
 554                       *name++;
 555        }
 556
 557        return hash;
 558}
 559
 560static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
 561{
 562        return namevalue_size(name_len, value_len) +
 563                sizeof(struct ocfs2_xattr_entry);
 564}
 565
 566static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
 567{
 568        return namevalue_size_xi(xi) +
 569                sizeof(struct ocfs2_xattr_entry);
 570}
 571
 572static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
 573{
 574        return namevalue_size_xe(xe) +
 575                sizeof(struct ocfs2_xattr_entry);
 576}
 577
 578int ocfs2_calc_security_init(struct inode *dir,
 579                             struct ocfs2_security_xattr_info *si,
 580                             int *want_clusters,
 581                             int *xattr_credits,
 582                             struct ocfs2_alloc_context **xattr_ac)
 583{
 584        int ret = 0;
 585        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 586        int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 587                                                 si->value_len);
 588
 589        /*
 590         * The max space of security xattr taken inline is
 591         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 592         * So reserve one metadata block for it is ok.
 593         */
 594        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 595            s_size > OCFS2_XATTR_FREE_IN_IBODY) {
 596                ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
 597                if (ret) {
 598                        mlog_errno(ret);
 599                        return ret;
 600                }
 601                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 602        }
 603
 604        /* reserve clusters for xattr value which will be set in B tree*/
 605        if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 606                int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 607                                                            si->value_len);
 608
 609                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 610                                                           new_clusters);
 611                *want_clusters += new_clusters;
 612        }
 613        return ret;
 614}
 615
 616int ocfs2_calc_xattr_init(struct inode *dir,
 617                          struct buffer_head *dir_bh,
 618                          umode_t mode,
 619                          struct ocfs2_security_xattr_info *si,
 620                          int *want_clusters,
 621                          int *xattr_credits,
 622                          int *want_meta)
 623{
 624        int ret = 0;
 625        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 626        int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
 627
 628        if (si->enable)
 629                s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 630                                                     si->value_len);
 631
 632        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
 633                down_read(&OCFS2_I(dir)->ip_xattr_sem);
 634                acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
 635                                        OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
 636                                        "", NULL, 0);
 637                up_read(&OCFS2_I(dir)->ip_xattr_sem);
 638                if (acl_len > 0) {
 639                        a_size = ocfs2_xattr_entry_real_size(0, acl_len);
 640                        if (S_ISDIR(mode))
 641                                a_size <<= 1;
 642                } else if (acl_len != 0 && acl_len != -ENODATA) {
 643                        ret = acl_len;
 644                        mlog_errno(ret);
 645                        return ret;
 646                }
 647        }
 648
 649        if (!(s_size + a_size))
 650                return ret;
 651
 652        /*
 653         * The max space of security xattr taken inline is
 654         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 655         * The max space of acl xattr taken inline is
 656         * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
 657         * when blocksize = 512, may reserve one more cluser for
 658         * xattr bucket, otherwise reserve one metadata block
 659         * for them is ok.
 660         * If this is a new directory with inline data,
 661         * we choose to reserve the entire inline area for
 662         * directory contents and force an external xattr block.
 663         */
 664        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 665            (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
 666            (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
 667                *want_meta = *want_meta + 1;
 668                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 669        }
 670
 671        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
 672            (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
 673                *want_clusters += 1;
 674                *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
 675        }
 676
 677        /*
 678         * reserve credits and clusters for xattrs which has large value
 679         * and have to be set outside
 680         */
 681        if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 682                new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 683                                                        si->value_len);
 684                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 685                                                           new_clusters);
 686                *want_clusters += new_clusters;
 687        }
 688        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
 689            acl_len > OCFS2_XATTR_INLINE_SIZE) {
 690                /* for directory, it has DEFAULT and ACCESS two types of acls */
 691                new_clusters = (S_ISDIR(mode) ? 2 : 1) *
 692                                ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
 693                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 694                                                           new_clusters);
 695                *want_clusters += new_clusters;
 696        }
 697
 698        return ret;
 699}
 700
 701static int ocfs2_xattr_extend_allocation(struct inode *inode,
 702                                         u32 clusters_to_add,
 703                                         struct ocfs2_xattr_value_buf *vb,
 704                                         struct ocfs2_xattr_set_ctxt *ctxt)
 705{
 706        int status = 0, credits;
 707        handle_t *handle = ctxt->handle;
 708        enum ocfs2_alloc_restarted why;
 709        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 710        struct ocfs2_extent_tree et;
 711
 712        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 713
 714        while (clusters_to_add) {
 715                trace_ocfs2_xattr_extend_allocation(clusters_to_add);
 716
 717                status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 718                                       OCFS2_JOURNAL_ACCESS_WRITE);
 719                if (status < 0) {
 720                        mlog_errno(status);
 721                        break;
 722                }
 723
 724                prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 725                status = ocfs2_add_clusters_in_btree(handle,
 726                                                     &et,
 727                                                     &logical_start,
 728                                                     clusters_to_add,
 729                                                     0,
 730                                                     ctxt->data_ac,
 731                                                     ctxt->meta_ac,
 732                                                     &why);
 733                if ((status < 0) && (status != -EAGAIN)) {
 734                        if (status != -ENOSPC)
 735                                mlog_errno(status);
 736                        break;
 737                }
 738
 739                ocfs2_journal_dirty(handle, vb->vb_bh);
 740
 741                clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
 742                                         prev_clusters;
 743
 744                if (why != RESTART_NONE && clusters_to_add) {
 745                        /*
 746                         * We can only fail in case the alloc file doesn't give
 747                         * up enough clusters.
 748                         */
 749                        BUG_ON(why == RESTART_META);
 750
 751                        credits = ocfs2_calc_extend_credits(inode->i_sb,
 752                                                            &vb->vb_xv->xr_list);
 753                        status = ocfs2_extend_trans(handle, credits);
 754                        if (status < 0) {
 755                                status = -ENOMEM;
 756                                mlog_errno(status);
 757                                break;
 758                        }
 759                }
 760        }
 761
 762        return status;
 763}
 764
 765static int __ocfs2_remove_xattr_range(struct inode *inode,
 766                                      struct ocfs2_xattr_value_buf *vb,
 767                                      u32 cpos, u32 phys_cpos, u32 len,
 768                                      unsigned int ext_flags,
 769                                      struct ocfs2_xattr_set_ctxt *ctxt)
 770{
 771        int ret;
 772        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 773        handle_t *handle = ctxt->handle;
 774        struct ocfs2_extent_tree et;
 775
 776        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 777
 778        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 779                            OCFS2_JOURNAL_ACCESS_WRITE);
 780        if (ret) {
 781                mlog_errno(ret);
 782                goto out;
 783        }
 784
 785        ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 786                                  &ctxt->dealloc);
 787        if (ret) {
 788                mlog_errno(ret);
 789                goto out;
 790        }
 791
 792        le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
 793        ocfs2_journal_dirty(handle, vb->vb_bh);
 794
 795        if (ext_flags & OCFS2_EXT_REFCOUNTED)
 796                ret = ocfs2_decrease_refcount(inode, handle,
 797                                        ocfs2_blocks_to_clusters(inode->i_sb,
 798                                                                 phys_blkno),
 799                                        len, ctxt->meta_ac, &ctxt->dealloc, 1);
 800        else
 801                ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
 802                                                  phys_blkno, len);
 803        if (ret)
 804                mlog_errno(ret);
 805
 806out:
 807        return ret;
 808}
 809
 810static int ocfs2_xattr_shrink_size(struct inode *inode,
 811                                   u32 old_clusters,
 812                                   u32 new_clusters,
 813                                   struct ocfs2_xattr_value_buf *vb,
 814                                   struct ocfs2_xattr_set_ctxt *ctxt)
 815{
 816        int ret = 0;
 817        unsigned int ext_flags;
 818        u32 trunc_len, cpos, phys_cpos, alloc_size;
 819        u64 block;
 820
 821        if (old_clusters <= new_clusters)
 822                return 0;
 823
 824        cpos = new_clusters;
 825        trunc_len = old_clusters - new_clusters;
 826        while (trunc_len) {
 827                ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 828                                               &alloc_size,
 829                                               &vb->vb_xv->xr_list, &ext_flags);
 830                if (ret) {
 831                        mlog_errno(ret);
 832                        goto out;
 833                }
 834
 835                if (alloc_size > trunc_len)
 836                        alloc_size = trunc_len;
 837
 838                ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 839                                                 phys_cpos, alloc_size,
 840                                                 ext_flags, ctxt);
 841                if (ret) {
 842                        mlog_errno(ret);
 843                        goto out;
 844                }
 845
 846                block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 847                ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
 848                                                       block, alloc_size);
 849                cpos += alloc_size;
 850                trunc_len -= alloc_size;
 851        }
 852
 853out:
 854        return ret;
 855}
 856
 857static int ocfs2_xattr_value_truncate(struct inode *inode,
 858                                      struct ocfs2_xattr_value_buf *vb,
 859                                      int len,
 860                                      struct ocfs2_xattr_set_ctxt *ctxt)
 861{
 862        int ret;
 863        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
 864        u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 865
 866        if (new_clusters == old_clusters)
 867                return 0;
 868
 869        if (new_clusters > old_clusters)
 870                ret = ocfs2_xattr_extend_allocation(inode,
 871                                                    new_clusters - old_clusters,
 872                                                    vb, ctxt);
 873        else
 874                ret = ocfs2_xattr_shrink_size(inode,
 875                                              old_clusters, new_clusters,
 876                                              vb, ctxt);
 877
 878        return ret;
 879}
 880
 881static int ocfs2_xattr_list_entry(struct super_block *sb,
 882                                  char *buffer, size_t size,
 883                                  size_t *result, int type,
 884                                  const char *name, int name_len)
 885{
 886        char *p = buffer + *result;
 887        const char *prefix;
 888        int prefix_len;
 889        int total_len;
 890
 891        switch(type) {
 892        case OCFS2_XATTR_INDEX_USER:
 893                if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 894                        return 0;
 895                break;
 896
 897        case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
 898        case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
 899                if (!(sb->s_flags & SB_POSIXACL))
 900                        return 0;
 901                break;
 902
 903        case OCFS2_XATTR_INDEX_TRUSTED:
 904                if (!capable(CAP_SYS_ADMIN))
 905                        return 0;
 906                break;
 907        }
 908
 909        prefix = ocfs2_xattr_prefix(type);
 910        if (!prefix)
 911                return 0;
 912        prefix_len = strlen(prefix);
 913        total_len = prefix_len + name_len + 1;
 914        *result += total_len;
 915
 916        /* we are just looking for how big our buffer needs to be */
 917        if (!size)
 918                return 0;
 919
 920        if (*result > size)
 921                return -ERANGE;
 922
 923        memcpy(p, prefix, prefix_len);
 924        memcpy(p + prefix_len, name, name_len);
 925        p[prefix_len + name_len] = '\0';
 926
 927        return 0;
 928}
 929
 930static int ocfs2_xattr_list_entries(struct inode *inode,
 931                                    struct ocfs2_xattr_header *header,
 932                                    char *buffer, size_t buffer_size)
 933{
 934        size_t result = 0;
 935        int i, type, ret;
 936        const char *name;
 937
 938        for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
 939                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 940                type = ocfs2_xattr_get_type(entry);
 941                name = (const char *)header +
 942                        le16_to_cpu(entry->xe_name_offset);
 943
 944                ret = ocfs2_xattr_list_entry(inode->i_sb,
 945                                             buffer, buffer_size,
 946                                             &result, type, name,
 947                                             entry->xe_name_len);
 948                if (ret)
 949                        return ret;
 950        }
 951
 952        return result;
 953}
 954
 955int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
 956                                         struct ocfs2_dinode *di)
 957{
 958        struct ocfs2_xattr_header *xh;
 959        int i;
 960
 961        xh = (struct ocfs2_xattr_header *)
 962                 ((void *)di + inode->i_sb->s_blocksize -
 963                 le16_to_cpu(di->i_xattr_inline_size));
 964
 965        for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
 966                if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
 967                        return 1;
 968
 969        return 0;
 970}
 971
 972static int ocfs2_xattr_ibody_list(struct inode *inode,
 973                                  struct ocfs2_dinode *di,
 974                                  char *buffer,
 975                                  size_t buffer_size)
 976{
 977        struct ocfs2_xattr_header *header = NULL;
 978        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 979        int ret = 0;
 980
 981        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
 982                return ret;
 983
 984        header = (struct ocfs2_xattr_header *)
 985                 ((void *)di + inode->i_sb->s_blocksize -
 986                 le16_to_cpu(di->i_xattr_inline_size));
 987
 988        ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
 989
 990        return ret;
 991}
 992
 993static int ocfs2_xattr_block_list(struct inode *inode,
 994                                  struct ocfs2_dinode *di,
 995                                  char *buffer,
 996                                  size_t buffer_size)
 997{
 998        struct buffer_head *blk_bh = NULL;
 999        struct ocfs2_xattr_block *xb;
1000        int ret = 0;
1001
1002        if (!di->i_xattr_loc)
1003                return ret;
1004
1005        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1006                                     &blk_bh);
1007        if (ret < 0) {
1008                mlog_errno(ret);
1009                return ret;
1010        }
1011
1012        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1013        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1014                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1015                ret = ocfs2_xattr_list_entries(inode, header,
1016                                               buffer, buffer_size);
1017        } else
1018                ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1019                                                   buffer, buffer_size);
1020
1021        brelse(blk_bh);
1022
1023        return ret;
1024}
1025
1026ssize_t ocfs2_listxattr(struct dentry *dentry,
1027                        char *buffer,
1028                        size_t size)
1029{
1030        int ret = 0, i_ret = 0, b_ret = 0;
1031        struct buffer_head *di_bh = NULL;
1032        struct ocfs2_dinode *di = NULL;
1033        struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1034
1035        if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1036                return -EOPNOTSUPP;
1037
1038        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1039                return ret;
1040
1041        ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1042        if (ret < 0) {
1043                mlog_errno(ret);
1044                return ret;
1045        }
1046
1047        di = (struct ocfs2_dinode *)di_bh->b_data;
1048
1049        down_read(&oi->ip_xattr_sem);
1050        i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1051        if (i_ret < 0)
1052                b_ret = 0;
1053        else {
1054                if (buffer) {
1055                        buffer += i_ret;
1056                        size -= i_ret;
1057                }
1058                b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1059                                               buffer, size);
1060                if (b_ret < 0)
1061                        i_ret = 0;
1062        }
1063        up_read(&oi->ip_xattr_sem);
1064        ocfs2_inode_unlock(d_inode(dentry), 0);
1065
1066        brelse(di_bh);
1067
1068        return i_ret + b_ret;
1069}
1070
1071static int ocfs2_xattr_find_entry(int name_index,
1072                                  const char *name,
1073                                  struct ocfs2_xattr_search *xs)
1074{
1075        struct ocfs2_xattr_entry *entry;
1076        size_t name_len;
1077        int i, cmp = 1;
1078
1079        if (name == NULL)
1080                return -EINVAL;
1081
1082        name_len = strlen(name);
1083        entry = xs->here;
1084        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1085                cmp = name_index - ocfs2_xattr_get_type(entry);
1086                if (!cmp)
1087                        cmp = name_len - entry->xe_name_len;
1088                if (!cmp)
1089                        cmp = memcmp(name, (xs->base +
1090                                     le16_to_cpu(entry->xe_name_offset)),
1091                                     name_len);
1092                if (cmp == 0)
1093                        break;
1094                entry += 1;
1095        }
1096        xs->here = entry;
1097
1098        return cmp ? -ENODATA : 0;
1099}
1100
1101static int ocfs2_xattr_get_value_outside(struct inode *inode,
1102                                         struct ocfs2_xattr_value_root *xv,
1103                                         void *buffer,
1104                                         size_t len)
1105{
1106        u32 cpos, p_cluster, num_clusters, bpc, clusters;
1107        u64 blkno;
1108        int i, ret = 0;
1109        size_t cplen, blocksize;
1110        struct buffer_head *bh = NULL;
1111        struct ocfs2_extent_list *el;
1112
1113        el = &xv->xr_list;
1114        clusters = le32_to_cpu(xv->xr_clusters);
1115        bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1116        blocksize = inode->i_sb->s_blocksize;
1117
1118        cpos = 0;
1119        while (cpos < clusters) {
1120                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1121                                               &num_clusters, el, NULL);
1122                if (ret) {
1123                        mlog_errno(ret);
1124                        goto out;
1125                }
1126
1127                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1128                /* Copy ocfs2_xattr_value */
1129                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1130                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1131                                               &bh, NULL);
1132                        if (ret) {
1133                                mlog_errno(ret);
1134                                goto out;
1135                        }
1136
1137                        cplen = len >= blocksize ? blocksize : len;
1138                        memcpy(buffer, bh->b_data, cplen);
1139                        len -= cplen;
1140                        buffer += cplen;
1141
1142                        brelse(bh);
1143                        bh = NULL;
1144                        if (len == 0)
1145                                break;
1146                }
1147                cpos += num_clusters;
1148        }
1149out:
1150        return ret;
1151}
1152
1153static int ocfs2_xattr_ibody_get(struct inode *inode,
1154                                 int name_index,
1155                                 const char *name,
1156                                 void *buffer,
1157                                 size_t buffer_size,
1158                                 struct ocfs2_xattr_search *xs)
1159{
1160        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1161        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1162        struct ocfs2_xattr_value_root *xv;
1163        size_t size;
1164        int ret = 0;
1165
1166        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1167                return -ENODATA;
1168
1169        xs->end = (void *)di + inode->i_sb->s_blocksize;
1170        xs->header = (struct ocfs2_xattr_header *)
1171                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1172        xs->base = (void *)xs->header;
1173        xs->here = xs->header->xh_entries;
1174
1175        ret = ocfs2_xattr_find_entry(name_index, name, xs);
1176        if (ret)
1177                return ret;
1178        size = le64_to_cpu(xs->here->xe_value_size);
1179        if (buffer) {
1180                if (size > buffer_size)
1181                        return -ERANGE;
1182                if (ocfs2_xattr_is_local(xs->here)) {
1183                        memcpy(buffer, (void *)xs->base +
1184                               le16_to_cpu(xs->here->xe_name_offset) +
1185                               OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1186                } else {
1187                        xv = (struct ocfs2_xattr_value_root *)
1188                                (xs->base + le16_to_cpu(
1189                                 xs->here->xe_name_offset) +
1190                                OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1191                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1192                                                            buffer, size);
1193                        if (ret < 0) {
1194                                mlog_errno(ret);
1195                                return ret;
1196                        }
1197                }
1198        }
1199
1200        return size;
1201}
1202
1203static int ocfs2_xattr_block_get(struct inode *inode,
1204                                 int name_index,
1205                                 const char *name,
1206                                 void *buffer,
1207                                 size_t buffer_size,
1208                                 struct ocfs2_xattr_search *xs)
1209{
1210        struct ocfs2_xattr_block *xb;
1211        struct ocfs2_xattr_value_root *xv;
1212        size_t size;
1213        int ret = -ENODATA, name_offset, name_len, i;
1214        int uninitialized_var(block_off);
1215
1216        xs->bucket = ocfs2_xattr_bucket_new(inode);
1217        if (!xs->bucket) {
1218                ret = -ENOMEM;
1219                mlog_errno(ret);
1220                goto cleanup;
1221        }
1222
1223        ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1224        if (ret) {
1225                mlog_errno(ret);
1226                goto cleanup;
1227        }
1228
1229        if (xs->not_found) {
1230                ret = -ENODATA;
1231                goto cleanup;
1232        }
1233
1234        xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1235        size = le64_to_cpu(xs->here->xe_value_size);
1236        if (buffer) {
1237                ret = -ERANGE;
1238                if (size > buffer_size)
1239                        goto cleanup;
1240
1241                name_offset = le16_to_cpu(xs->here->xe_name_offset);
1242                name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1243                i = xs->here - xs->header->xh_entries;
1244
1245                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1246                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1247                                                                bucket_xh(xs->bucket),
1248                                                                i,
1249                                                                &block_off,
1250                                                                &name_offset);
1251                        if (ret) {
1252                                mlog_errno(ret);
1253                                goto cleanup;
1254                        }
1255                        xs->base = bucket_block(xs->bucket, block_off);
1256                }
1257                if (ocfs2_xattr_is_local(xs->here)) {
1258                        memcpy(buffer, (void *)xs->base +
1259                               name_offset + name_len, size);
1260                } else {
1261                        xv = (struct ocfs2_xattr_value_root *)
1262                                (xs->base + name_offset + name_len);
1263                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1264                                                            buffer, size);
1265                        if (ret < 0) {
1266                                mlog_errno(ret);
1267                                goto cleanup;
1268                        }
1269                }
1270        }
1271        ret = size;
1272cleanup:
1273        ocfs2_xattr_bucket_free(xs->bucket);
1274
1275        brelse(xs->xattr_bh);
1276        xs->xattr_bh = NULL;
1277        return ret;
1278}
1279
1280int ocfs2_xattr_get_nolock(struct inode *inode,
1281                           struct buffer_head *di_bh,
1282                           int name_index,
1283                           const char *name,
1284                           void *buffer,
1285                           size_t buffer_size)
1286{
1287        int ret;
1288        struct ocfs2_dinode *di = NULL;
1289        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1290        struct ocfs2_xattr_search xis = {
1291                .not_found = -ENODATA,
1292        };
1293        struct ocfs2_xattr_search xbs = {
1294                .not_found = -ENODATA,
1295        };
1296
1297        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1298                return -EOPNOTSUPP;
1299
1300        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1301                return -ENODATA;
1302
1303        xis.inode_bh = xbs.inode_bh = di_bh;
1304        di = (struct ocfs2_dinode *)di_bh->b_data;
1305
1306        ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1307                                    buffer_size, &xis);
1308        if (ret == -ENODATA && di->i_xattr_loc)
1309                ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1310                                            buffer_size, &xbs);
1311
1312        return ret;
1313}
1314
1315/* ocfs2_xattr_get()
1316 *
1317 * Copy an extended attribute into the buffer provided.
1318 * Buffer is NULL to compute the size of buffer required.
1319 */
1320static int ocfs2_xattr_get(struct inode *inode,
1321                           int name_index,
1322                           const char *name,
1323                           void *buffer,
1324                           size_t buffer_size)
1325{
1326        int ret, had_lock;
1327        struct buffer_head *di_bh = NULL;
1328        struct ocfs2_lock_holder oh;
1329
1330        had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
1331        if (had_lock < 0) {
1332                mlog_errno(had_lock);
1333                return had_lock;
1334        }
1335        down_read(&OCFS2_I(inode)->ip_xattr_sem);
1336        ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1337                                     name, buffer, buffer_size);
1338        up_read(&OCFS2_I(inode)->ip_xattr_sem);
1339
1340        ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
1341
1342        brelse(di_bh);
1343
1344        return ret;
1345}
1346
1347static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1348                                           handle_t *handle,
1349                                           struct ocfs2_xattr_value_buf *vb,
1350                                           const void *value,
1351                                           int value_len)
1352{
1353        int ret = 0, i, cp_len;
1354        u16 blocksize = inode->i_sb->s_blocksize;
1355        u32 p_cluster, num_clusters;
1356        u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1357        u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1358        u64 blkno;
1359        struct buffer_head *bh = NULL;
1360        unsigned int ext_flags;
1361        struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1362
1363        BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1364
1365        while (cpos < clusters) {
1366                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1367                                               &num_clusters, &xv->xr_list,
1368                                               &ext_flags);
1369                if (ret) {
1370                        mlog_errno(ret);
1371                        goto out;
1372                }
1373
1374                BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1375
1376                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1377
1378                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1379                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1380                                               &bh, NULL);
1381                        if (ret) {
1382                                mlog_errno(ret);
1383                                goto out;
1384                        }
1385
1386                        ret = ocfs2_journal_access(handle,
1387                                                   INODE_CACHE(inode),
1388                                                   bh,
1389                                                   OCFS2_JOURNAL_ACCESS_WRITE);
1390                        if (ret < 0) {
1391                                mlog_errno(ret);
1392                                goto out;
1393                        }
1394
1395                        cp_len = value_len > blocksize ? blocksize : value_len;
1396                        memcpy(bh->b_data, value, cp_len);
1397                        value_len -= cp_len;
1398                        value += cp_len;
1399                        if (cp_len < blocksize)
1400                                memset(bh->b_data + cp_len, 0,
1401                                       blocksize - cp_len);
1402
1403                        ocfs2_journal_dirty(handle, bh);
1404                        brelse(bh);
1405                        bh = NULL;
1406
1407                        /*
1408                         * XXX: do we need to empty all the following
1409                         * blocks in this cluster?
1410                         */
1411                        if (!value_len)
1412                                break;
1413                }
1414                cpos += num_clusters;
1415        }
1416out:
1417        brelse(bh);
1418
1419        return ret;
1420}
1421
1422static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1423                                       int num_entries)
1424{
1425        int free_space;
1426
1427        if (!needed_space)
1428                return 0;
1429
1430        free_space = free_start -
1431                sizeof(struct ocfs2_xattr_header) -
1432                (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1433                OCFS2_XATTR_HEADER_GAP;
1434        if (free_space < 0)
1435                return -EIO;
1436        if (free_space < needed_space)
1437                return -ENOSPC;
1438
1439        return 0;
1440}
1441
1442static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1443                                   int type)
1444{
1445        return loc->xl_ops->xlo_journal_access(handle, loc, type);
1446}
1447
1448static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1449{
1450        loc->xl_ops->xlo_journal_dirty(handle, loc);
1451}
1452
1453/* Give a pointer into the storage for the given offset */
1454static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1455{
1456        BUG_ON(offset >= loc->xl_size);
1457        return loc->xl_ops->xlo_offset_pointer(loc, offset);
1458}
1459
1460/*
1461 * Wipe the name+value pair and allow the storage to reclaim it.  This
1462 * must be followed by either removal of the entry or a call to
1463 * ocfs2_xa_add_namevalue().
1464 */
1465static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1466{
1467        loc->xl_ops->xlo_wipe_namevalue(loc);
1468}
1469
1470/*
1471 * Find lowest offset to a name+value pair.  This is the start of our
1472 * downward-growing free space.
1473 */
1474static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1475{
1476        return loc->xl_ops->xlo_get_free_start(loc);
1477}
1478
1479/* Can we reuse loc->xl_entry for xi? */
1480static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1481                                    struct ocfs2_xattr_info *xi)
1482{
1483        return loc->xl_ops->xlo_can_reuse(loc, xi);
1484}
1485
1486/* How much free space is needed to set the new value */
1487static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1488                                struct ocfs2_xattr_info *xi)
1489{
1490        return loc->xl_ops->xlo_check_space(loc, xi);
1491}
1492
1493static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1494{
1495        loc->xl_ops->xlo_add_entry(loc, name_hash);
1496        loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1497        /*
1498         * We can't leave the new entry's xe_name_offset at zero or
1499         * add_namevalue() will go nuts.  We set it to the size of our
1500         * storage so that it can never be less than any other entry.
1501         */
1502        loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1503}
1504
1505static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1506                                   struct ocfs2_xattr_info *xi)
1507{
1508        int size = namevalue_size_xi(xi);
1509        int nameval_offset;
1510        char *nameval_buf;
1511
1512        loc->xl_ops->xlo_add_namevalue(loc, size);
1513        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1514        loc->xl_entry->xe_name_len = xi->xi_name_len;
1515        ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1516        ocfs2_xattr_set_local(loc->xl_entry,
1517                              xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1518
1519        nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1520        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1521        memset(nameval_buf, 0, size);
1522        memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1523}
1524
1525static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1526                                    struct ocfs2_xattr_value_buf *vb)
1527{
1528        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1529        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1530
1531        /* Value bufs are for value trees */
1532        BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1533        BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1534               (name_size + OCFS2_XATTR_ROOT_SIZE));
1535
1536        loc->xl_ops->xlo_fill_value_buf(loc, vb);
1537        vb->vb_xv =
1538                (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1539                                                        nameval_offset +
1540                                                        name_size);
1541}
1542
1543static int ocfs2_xa_block_journal_access(handle_t *handle,
1544                                         struct ocfs2_xa_loc *loc, int type)
1545{
1546        struct buffer_head *bh = loc->xl_storage;
1547        ocfs2_journal_access_func access;
1548
1549        if (loc->xl_size == (bh->b_size -
1550                             offsetof(struct ocfs2_xattr_block,
1551                                      xb_attrs.xb_header)))
1552                access = ocfs2_journal_access_xb;
1553        else
1554                access = ocfs2_journal_access_di;
1555        return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1556}
1557
1558static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1559                                         struct ocfs2_xa_loc *loc)
1560{
1561        struct buffer_head *bh = loc->xl_storage;
1562
1563        ocfs2_journal_dirty(handle, bh);
1564}
1565
1566static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1567                                           int offset)
1568{
1569        return (char *)loc->xl_header + offset;
1570}
1571
1572static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1573                                    struct ocfs2_xattr_info *xi)
1574{
1575        /*
1576         * Block storage is strict.  If the sizes aren't exact, we will
1577         * remove the old one and reinsert the new.
1578         */
1579        return namevalue_size_xe(loc->xl_entry) ==
1580                namevalue_size_xi(xi);
1581}
1582
1583static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1584{
1585        struct ocfs2_xattr_header *xh = loc->xl_header;
1586        int i, count = le16_to_cpu(xh->xh_count);
1587        int offset, free_start = loc->xl_size;
1588
1589        for (i = 0; i < count; i++) {
1590                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1591                if (offset < free_start)
1592                        free_start = offset;
1593        }
1594
1595        return free_start;
1596}
1597
1598static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1599                                      struct ocfs2_xattr_info *xi)
1600{
1601        int count = le16_to_cpu(loc->xl_header->xh_count);
1602        int free_start = ocfs2_xa_get_free_start(loc);
1603        int needed_space = ocfs2_xi_entry_usage(xi);
1604
1605        /*
1606         * Block storage will reclaim the original entry before inserting
1607         * the new value, so we only need the difference.  If the new
1608         * entry is smaller than the old one, we don't need anything.
1609         */
1610        if (loc->xl_entry) {
1611                /* Don't need space if we're reusing! */
1612                if (ocfs2_xa_can_reuse_entry(loc, xi))
1613                        needed_space = 0;
1614                else
1615                        needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1616        }
1617        if (needed_space < 0)
1618                needed_space = 0;
1619        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1620}
1621
1622/*
1623 * Block storage for xattrs keeps the name+value pairs compacted.  When
1624 * we remove one, we have to shift any that preceded it towards the end.
1625 */
1626static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1627{
1628        int i, offset;
1629        int namevalue_offset, first_namevalue_offset, namevalue_size;
1630        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1631        struct ocfs2_xattr_header *xh = loc->xl_header;
1632        int count = le16_to_cpu(xh->xh_count);
1633
1634        namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1635        namevalue_size = namevalue_size_xe(entry);
1636        first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1637
1638        /* Shift the name+value pairs */
1639        memmove((char *)xh + first_namevalue_offset + namevalue_size,
1640                (char *)xh + first_namevalue_offset,
1641                namevalue_offset - first_namevalue_offset);
1642        memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1643
1644        /* Now tell xh->xh_entries about it */
1645        for (i = 0; i < count; i++) {
1646                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1647                if (offset <= namevalue_offset)
1648                        le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1649                                     namevalue_size);
1650        }
1651
1652        /*
1653         * Note that we don't update xh_free_start or xh_name_value_len
1654         * because they're not used in block-stored xattrs.
1655         */
1656}
1657
1658static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1659{
1660        int count = le16_to_cpu(loc->xl_header->xh_count);
1661        loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1662        le16_add_cpu(&loc->xl_header->xh_count, 1);
1663        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1664}
1665
1666static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1667{
1668        int free_start = ocfs2_xa_get_free_start(loc);
1669
1670        loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1671}
1672
1673static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1674                                          struct ocfs2_xattr_value_buf *vb)
1675{
1676        struct buffer_head *bh = loc->xl_storage;
1677
1678        if (loc->xl_size == (bh->b_size -
1679                             offsetof(struct ocfs2_xattr_block,
1680                                      xb_attrs.xb_header)))
1681                vb->vb_access = ocfs2_journal_access_xb;
1682        else
1683                vb->vb_access = ocfs2_journal_access_di;
1684        vb->vb_bh = bh;
1685}
1686
1687/*
1688 * Operations for xattrs stored in blocks.  This includes inline inode
1689 * storage and unindexed ocfs2_xattr_blocks.
1690 */
1691static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1692        .xlo_journal_access     = ocfs2_xa_block_journal_access,
1693        .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1694        .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1695        .xlo_check_space        = ocfs2_xa_block_check_space,
1696        .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1697        .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1698        .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1699        .xlo_add_entry          = ocfs2_xa_block_add_entry,
1700        .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1701        .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1702};
1703
1704static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1705                                          struct ocfs2_xa_loc *loc, int type)
1706{
1707        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1708
1709        return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1710}
1711
1712static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1713                                          struct ocfs2_xa_loc *loc)
1714{
1715        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1716
1717        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1718}
1719
1720static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1721                                            int offset)
1722{
1723        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1724        int block, block_offset;
1725
1726        /* The header is at the front of the bucket */
1727        block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1728        block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1729
1730        return bucket_block(bucket, block) + block_offset;
1731}
1732
1733static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1734                                     struct ocfs2_xattr_info *xi)
1735{
1736        return namevalue_size_xe(loc->xl_entry) >=
1737                namevalue_size_xi(xi);
1738}
1739
1740static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1741{
1742        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1743        return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1744}
1745
1746static int ocfs2_bucket_align_free_start(struct super_block *sb,
1747                                         int free_start, int size)
1748{
1749        /*
1750         * We need to make sure that the name+value pair fits within
1751         * one block.
1752         */
1753        if (((free_start - size) >> sb->s_blocksize_bits) !=
1754            ((free_start - 1) >> sb->s_blocksize_bits))
1755                free_start -= free_start % sb->s_blocksize;
1756
1757        return free_start;
1758}
1759
1760static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1761                                       struct ocfs2_xattr_info *xi)
1762{
1763        int rc;
1764        int count = le16_to_cpu(loc->xl_header->xh_count);
1765        int free_start = ocfs2_xa_get_free_start(loc);
1766        int needed_space = ocfs2_xi_entry_usage(xi);
1767        int size = namevalue_size_xi(xi);
1768        struct super_block *sb = loc->xl_inode->i_sb;
1769
1770        /*
1771         * Bucket storage does not reclaim name+value pairs it cannot
1772         * reuse.  They live as holes until the bucket fills, and then
1773         * the bucket is defragmented.  However, the bucket can reclaim
1774         * the ocfs2_xattr_entry.
1775         */
1776        if (loc->xl_entry) {
1777                /* Don't need space if we're reusing! */
1778                if (ocfs2_xa_can_reuse_entry(loc, xi))
1779                        needed_space = 0;
1780                else
1781                        needed_space -= sizeof(struct ocfs2_xattr_entry);
1782        }
1783        BUG_ON(needed_space < 0);
1784
1785        if (free_start < size) {
1786                if (needed_space)
1787                        return -ENOSPC;
1788        } else {
1789                /*
1790                 * First we check if it would fit in the first place.
1791                 * Below, we align the free start to a block.  This may
1792                 * slide us below the minimum gap.  By checking unaligned
1793                 * first, we avoid that error.
1794                 */
1795                rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1796                                                 count);
1797                if (rc)
1798                        return rc;
1799                free_start = ocfs2_bucket_align_free_start(sb, free_start,
1800                                                           size);
1801        }
1802        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1803}
1804
1805static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1806{
1807        le16_add_cpu(&loc->xl_header->xh_name_value_len,
1808                     -namevalue_size_xe(loc->xl_entry));
1809}
1810
1811static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1812{
1813        struct ocfs2_xattr_header *xh = loc->xl_header;
1814        int count = le16_to_cpu(xh->xh_count);
1815        int low = 0, high = count - 1, tmp;
1816        struct ocfs2_xattr_entry *tmp_xe;
1817
1818        /*
1819         * We keep buckets sorted by name_hash, so we need to find
1820         * our insert place.
1821         */
1822        while (low <= high && count) {
1823                tmp = (low + high) / 2;
1824                tmp_xe = &xh->xh_entries[tmp];
1825
1826                if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1827                        low = tmp + 1;
1828                else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1829                        high = tmp - 1;
1830                else {
1831                        low = tmp;
1832                        break;
1833                }
1834        }
1835
1836        if (low != count)
1837                memmove(&xh->xh_entries[low + 1],
1838                        &xh->xh_entries[low],
1839                        ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1840
1841        le16_add_cpu(&xh->xh_count, 1);
1842        loc->xl_entry = &xh->xh_entries[low];
1843        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1844}
1845
1846static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1847{
1848        int free_start = ocfs2_xa_get_free_start(loc);
1849        struct ocfs2_xattr_header *xh = loc->xl_header;
1850        struct super_block *sb = loc->xl_inode->i_sb;
1851        int nameval_offset;
1852
1853        free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1854        nameval_offset = free_start - size;
1855        loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1856        xh->xh_free_start = cpu_to_le16(nameval_offset);
1857        le16_add_cpu(&xh->xh_name_value_len, size);
1858
1859}
1860
1861static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1862                                           struct ocfs2_xattr_value_buf *vb)
1863{
1864        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1865        struct super_block *sb = loc->xl_inode->i_sb;
1866        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1867        int size = namevalue_size_xe(loc->xl_entry);
1868        int block_offset = nameval_offset >> sb->s_blocksize_bits;
1869
1870        /* Values are not allowed to straddle block boundaries */
1871        BUG_ON(block_offset !=
1872               ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1873        /* We expect the bucket to be filled in */
1874        BUG_ON(!bucket->bu_bhs[block_offset]);
1875
1876        vb->vb_access = ocfs2_journal_access;
1877        vb->vb_bh = bucket->bu_bhs[block_offset];
1878}
1879
1880/* Operations for xattrs stored in buckets. */
1881static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1882        .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1883        .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1884        .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1885        .xlo_check_space        = ocfs2_xa_bucket_check_space,
1886        .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1887        .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1888        .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1889        .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1890        .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1891        .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1892};
1893
1894static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1895{
1896        struct ocfs2_xattr_value_buf vb;
1897
1898        if (ocfs2_xattr_is_local(loc->xl_entry))
1899                return 0;
1900
1901        ocfs2_xa_fill_value_buf(loc, &vb);
1902        return le32_to_cpu(vb.vb_xv->xr_clusters);
1903}
1904
1905static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1906                                   struct ocfs2_xattr_set_ctxt *ctxt)
1907{
1908        int trunc_rc, access_rc;
1909        struct ocfs2_xattr_value_buf vb;
1910
1911        ocfs2_xa_fill_value_buf(loc, &vb);
1912        trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1913                                              ctxt);
1914
1915        /*
1916         * The caller of ocfs2_xa_value_truncate() has already called
1917         * ocfs2_xa_journal_access on the loc.  However, The truncate code
1918         * calls ocfs2_extend_trans().  This may commit the previous
1919         * transaction and open a new one.  If this is a bucket, truncate
1920         * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1921         * the caller is expecting to dirty the entire bucket.  So we must
1922         * reset the journal work.  We do this even if truncate has failed,
1923         * as it could have failed after committing the extend.
1924         */
1925        access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1926                                            OCFS2_JOURNAL_ACCESS_WRITE);
1927
1928        /* Errors in truncate take precedence */
1929        return trunc_rc ? trunc_rc : access_rc;
1930}
1931
1932static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1933{
1934        int index, count;
1935        struct ocfs2_xattr_header *xh = loc->xl_header;
1936        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1937
1938        ocfs2_xa_wipe_namevalue(loc);
1939        loc->xl_entry = NULL;
1940
1941        le16_add_cpu(&xh->xh_count, -1);
1942        count = le16_to_cpu(xh->xh_count);
1943
1944        /*
1945         * Only zero out the entry if there are more remaining.  This is
1946         * important for an empty bucket, as it keeps track of the
1947         * bucket's hash value.  It doesn't hurt empty block storage.
1948         */
1949        if (count) {
1950                index = ((char *)entry - (char *)&xh->xh_entries) /
1951                        sizeof(struct ocfs2_xattr_entry);
1952                memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1953                        (count - index) * sizeof(struct ocfs2_xattr_entry));
1954                memset(&xh->xh_entries[count], 0,
1955                       sizeof(struct ocfs2_xattr_entry));
1956        }
1957}
1958
1959/*
1960 * If we have a problem adjusting the size of an external value during
1961 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1962 * in an intermediate state.  For example, the value may be partially
1963 * truncated.
1964 *
1965 * If the value tree hasn't changed, the extend/truncate went nowhere.
1966 * We have nothing to do.  The caller can treat it as a straight error.
1967 *
1968 * If the value tree got partially truncated, we now have a corrupted
1969 * extended attribute.  We're going to wipe its entry and leak the
1970 * clusters.  Better to leak some storage than leave a corrupt entry.
1971 *
1972 * If the value tree grew, it obviously didn't grow enough for the
1973 * new entry.  We're not going to try and reclaim those clusters either.
1974 * If there was already an external value there (orig_clusters != 0),
1975 * the new clusters are attached safely and we can just leave the old
1976 * value in place.  If there was no external value there, we remove
1977 * the entry.
1978 *
1979 * This way, the xattr block we store in the journal will be consistent.
1980 * If the size change broke because of the journal, no changes will hit
1981 * disk anyway.
1982 */
1983static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1984                                            const char *what,
1985                                            unsigned int orig_clusters)
1986{
1987        unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1988        char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1989                                le16_to_cpu(loc->xl_entry->xe_name_offset));
1990
1991        if (new_clusters < orig_clusters) {
1992                mlog(ML_ERROR,
1993                     "Partial truncate while %s xattr %.*s.  Leaking "
1994                     "%u clusters and removing the entry\n",
1995                     what, loc->xl_entry->xe_name_len, nameval_buf,
1996                     orig_clusters - new_clusters);
1997                ocfs2_xa_remove_entry(loc);
1998        } else if (!orig_clusters) {
1999                mlog(ML_ERROR,
2000                     "Unable to allocate an external value for xattr "
2001                     "%.*s safely.  Leaking %u clusters and removing the "
2002                     "entry\n",
2003                     loc->xl_entry->xe_name_len, nameval_buf,
2004                     new_clusters - orig_clusters);
2005                ocfs2_xa_remove_entry(loc);
2006        } else if (new_clusters > orig_clusters)
2007                mlog(ML_ERROR,
2008                     "Unable to grow xattr %.*s safely.  %u new clusters "
2009                     "have been added, but the value will not be "
2010                     "modified\n",
2011                     loc->xl_entry->xe_name_len, nameval_buf,
2012                     new_clusters - orig_clusters);
2013}
2014
2015static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
2016                           struct ocfs2_xattr_set_ctxt *ctxt)
2017{
2018        int rc = 0;
2019        unsigned int orig_clusters;
2020
2021        if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2022                orig_clusters = ocfs2_xa_value_clusters(loc);
2023                rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2024                if (rc) {
2025                        mlog_errno(rc);
2026                        /*
2027                         * Since this is remove, we can return 0 if
2028                         * ocfs2_xa_cleanup_value_truncate() is going to
2029                         * wipe the entry anyway.  So we check the
2030                         * cluster count as well.
2031                         */
2032                        if (orig_clusters != ocfs2_xa_value_clusters(loc))
2033                                rc = 0;
2034                        ocfs2_xa_cleanup_value_truncate(loc, "removing",
2035                                                        orig_clusters);
2036                        if (rc)
2037                                goto out;
2038                }
2039        }
2040
2041        ocfs2_xa_remove_entry(loc);
2042
2043out:
2044        return rc;
2045}
2046
2047static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2048{
2049        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2050        char *nameval_buf;
2051
2052        nameval_buf = ocfs2_xa_offset_pointer(loc,
2053                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2054        memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2055}
2056
2057/*
2058 * Take an existing entry and make it ready for the new value.  This
2059 * won't allocate space, but it may free space.  It should be ready for
2060 * ocfs2_xa_prepare_entry() to finish the work.
2061 */
2062static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2063                                struct ocfs2_xattr_info *xi,
2064                                struct ocfs2_xattr_set_ctxt *ctxt)
2065{
2066        int rc = 0;
2067        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2068        unsigned int orig_clusters;
2069        char *nameval_buf;
2070        int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2071        int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2072
2073        BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2074               name_size);
2075
2076        nameval_buf = ocfs2_xa_offset_pointer(loc,
2077                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2078        if (xe_local) {
2079                memset(nameval_buf + name_size, 0,
2080                       namevalue_size_xe(loc->xl_entry) - name_size);
2081                if (!xi_local)
2082                        ocfs2_xa_install_value_root(loc);
2083        } else {
2084                orig_clusters = ocfs2_xa_value_clusters(loc);
2085                if (xi_local) {
2086                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2087                        if (rc < 0)
2088                                mlog_errno(rc);
2089                        else
2090                                memset(nameval_buf + name_size, 0,
2091                                       namevalue_size_xe(loc->xl_entry) -
2092                                       name_size);
2093                } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2094                           xi->xi_value_len) {
2095                        rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2096                                                     ctxt);
2097                        if (rc < 0)
2098                                mlog_errno(rc);
2099                }
2100
2101                if (rc) {
2102                        ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2103                                                        orig_clusters);
2104                        goto out;
2105                }
2106        }
2107
2108        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2109        ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2110
2111out:
2112        return rc;
2113}
2114
2115/*
2116 * Prepares loc->xl_entry to receive the new xattr.  This includes
2117 * properly setting up the name+value pair region.  If loc->xl_entry
2118 * already exists, it will take care of modifying it appropriately.
2119 *
2120 * Note that this modifies the data.  You did journal_access already,
2121 * right?
2122 */
2123static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2124                                  struct ocfs2_xattr_info *xi,
2125                                  u32 name_hash,
2126                                  struct ocfs2_xattr_set_ctxt *ctxt)
2127{
2128        int rc = 0;
2129        unsigned int orig_clusters;
2130        __le64 orig_value_size = 0;
2131
2132        rc = ocfs2_xa_check_space(loc, xi);
2133        if (rc)
2134                goto out;
2135
2136        if (loc->xl_entry) {
2137                if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2138                        orig_value_size = loc->xl_entry->xe_value_size;
2139                        rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2140                        if (rc)
2141                                goto out;
2142                        goto alloc_value;
2143                }
2144
2145                if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2146                        orig_clusters = ocfs2_xa_value_clusters(loc);
2147                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2148                        if (rc) {
2149                                mlog_errno(rc);
2150                                ocfs2_xa_cleanup_value_truncate(loc,
2151                                                                "overwriting",
2152                                                                orig_clusters);
2153                                goto out;
2154                        }
2155                }
2156                ocfs2_xa_wipe_namevalue(loc);
2157        } else
2158                ocfs2_xa_add_entry(loc, name_hash);
2159
2160        /*
2161         * If we get here, we have a blank entry.  Fill it.  We grow our
2162         * name+value pair back from the end.
2163         */
2164        ocfs2_xa_add_namevalue(loc, xi);
2165        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2166                ocfs2_xa_install_value_root(loc);
2167
2168alloc_value:
2169        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2170                orig_clusters = ocfs2_xa_value_clusters(loc);
2171                rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2172                if (rc < 0) {
2173                        ctxt->set_abort = 1;
2174                        ocfs2_xa_cleanup_value_truncate(loc, "growing",
2175                                                        orig_clusters);
2176                        /*
2177                         * If we were growing an existing value,
2178                         * ocfs2_xa_cleanup_value_truncate() won't remove
2179                         * the entry. We need to restore the original value
2180                         * size.
2181                         */
2182                        if (loc->xl_entry) {
2183                                BUG_ON(!orig_value_size);
2184                                loc->xl_entry->xe_value_size = orig_value_size;
2185                        }
2186                        mlog_errno(rc);
2187                }
2188        }
2189
2190out:
2191        return rc;
2192}
2193
2194/*
2195 * Store the value portion of the name+value pair.  This will skip
2196 * values that are stored externally.  Their tree roots were set up
2197 * by ocfs2_xa_prepare_entry().
2198 */
2199static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2200                                struct ocfs2_xattr_info *xi,
2201                                struct ocfs2_xattr_set_ctxt *ctxt)
2202{
2203        int rc = 0;
2204        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2205        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2206        char *nameval_buf;
2207        struct ocfs2_xattr_value_buf vb;
2208
2209        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2210        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2211                ocfs2_xa_fill_value_buf(loc, &vb);
2212                rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2213                                                     ctxt->handle, &vb,
2214                                                     xi->xi_value,
2215                                                     xi->xi_value_len);
2216        } else
2217                memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2218
2219        return rc;
2220}
2221
2222static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2223                        struct ocfs2_xattr_info *xi,
2224                        struct ocfs2_xattr_set_ctxt *ctxt)
2225{
2226        int ret;
2227        u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2228                                              xi->xi_name_len);
2229
2230        ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2231                                      OCFS2_JOURNAL_ACCESS_WRITE);
2232        if (ret) {
2233                mlog_errno(ret);
2234                goto out;
2235        }
2236
2237        /*
2238         * From here on out, everything is going to modify the buffer a
2239         * little.  Errors are going to leave the xattr header in a
2240         * sane state.  Thus, even with errors we dirty the sucker.
2241         */
2242
2243        /* Don't worry, we are never called with !xi_value and !xl_entry */
2244        if (!xi->xi_value) {
2245                ret = ocfs2_xa_remove(loc, ctxt);
2246                goto out_dirty;
2247        }
2248
2249        ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2250        if (ret) {
2251                if (ret != -ENOSPC)
2252                        mlog_errno(ret);
2253                goto out_dirty;
2254        }
2255
2256        ret = ocfs2_xa_store_value(loc, xi, ctxt);
2257        if (ret)
2258                mlog_errno(ret);
2259
2260out_dirty:
2261        ocfs2_xa_journal_dirty(ctxt->handle, loc);
2262
2263out:
2264        return ret;
2265}
2266
2267static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2268                                     struct inode *inode,
2269                                     struct buffer_head *bh,
2270                                     struct ocfs2_xattr_entry *entry)
2271{
2272        struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2273
2274        BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2275
2276        loc->xl_inode = inode;
2277        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2278        loc->xl_storage = bh;
2279        loc->xl_entry = entry;
2280        loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2281        loc->xl_header =
2282                (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2283                                              loc->xl_size);
2284}
2285
2286static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2287                                          struct inode *inode,
2288                                          struct buffer_head *bh,
2289                                          struct ocfs2_xattr_entry *entry)
2290{
2291        struct ocfs2_xattr_block *xb =
2292                (struct ocfs2_xattr_block *)bh->b_data;
2293
2294        BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2295
2296        loc->xl_inode = inode;
2297        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2298        loc->xl_storage = bh;
2299        loc->xl_header = &(xb->xb_attrs.xb_header);
2300        loc->xl_entry = entry;
2301        loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2302                                             xb_attrs.xb_header);
2303}
2304
2305static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2306                                           struct ocfs2_xattr_bucket *bucket,
2307                                           struct ocfs2_xattr_entry *entry)
2308{
2309        loc->xl_inode = bucket->bu_inode;
2310        loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2311        loc->xl_storage = bucket;
2312        loc->xl_header = bucket_xh(bucket);
2313        loc->xl_entry = entry;
2314        loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2315}
2316
2317/*
2318 * In xattr remove, if it is stored outside and refcounted, we may have
2319 * the chance to split the refcount tree. So need the allocators.
2320 */
2321static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2322                                        struct ocfs2_xattr_value_root *xv,
2323                                        struct ocfs2_caching_info *ref_ci,
2324                                        struct buffer_head *ref_root_bh,
2325                                        struct ocfs2_alloc_context **meta_ac,
2326                                        int *ref_credits)
2327{
2328        int ret, meta_add = 0;
2329        u32 p_cluster, num_clusters;
2330        unsigned int ext_flags;
2331
2332        *ref_credits = 0;
2333        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2334                                       &num_clusters,
2335                                       &xv->xr_list,
2336                                       &ext_flags);
2337        if (ret) {
2338                mlog_errno(ret);
2339                goto out;
2340        }
2341
2342        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2343                goto out;
2344
2345        ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2346                                                 ref_root_bh, xv,
2347                                                 &meta_add, ref_credits);
2348        if (ret) {
2349                mlog_errno(ret);
2350                goto out;
2351        }
2352
2353        ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2354                                                meta_add, meta_ac);
2355        if (ret)
2356                mlog_errno(ret);
2357
2358out:
2359        return ret;
2360}
2361
2362static int ocfs2_remove_value_outside(struct inode*inode,
2363                                      struct ocfs2_xattr_value_buf *vb,
2364                                      struct ocfs2_xattr_header *header,
2365                                      struct ocfs2_caching_info *ref_ci,
2366                                      struct buffer_head *ref_root_bh)
2367{
2368        int ret = 0, i, ref_credits;
2369        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2370        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2371        void *val;
2372
2373        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2374
2375        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2376                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2377
2378                if (ocfs2_xattr_is_local(entry))
2379                        continue;
2380
2381                val = (void *)header +
2382                        le16_to_cpu(entry->xe_name_offset);
2383                vb->vb_xv = (struct ocfs2_xattr_value_root *)
2384                        (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2385
2386                ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2387                                                         ref_ci, ref_root_bh,
2388                                                         &ctxt.meta_ac,
2389                                                         &ref_credits);
2390
2391                ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2392                                        ocfs2_remove_extent_credits(osb->sb));
2393                if (IS_ERR(ctxt.handle)) {
2394                        ret = PTR_ERR(ctxt.handle);
2395                        mlog_errno(ret);
2396                        break;
2397                }
2398
2399                ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2400
2401                ocfs2_commit_trans(osb, ctxt.handle);
2402                if (ctxt.meta_ac) {
2403                        ocfs2_free_alloc_context(ctxt.meta_ac);
2404                        ctxt.meta_ac = NULL;
2405                }
2406
2407                if (ret < 0) {
2408                        mlog_errno(ret);
2409                        break;
2410                }
2411
2412        }
2413
2414        if (ctxt.meta_ac)
2415                ocfs2_free_alloc_context(ctxt.meta_ac);
2416        ocfs2_schedule_truncate_log_flush(osb, 1);
2417        ocfs2_run_deallocs(osb, &ctxt.dealloc);
2418        return ret;
2419}
2420
2421static int ocfs2_xattr_ibody_remove(struct inode *inode,
2422                                    struct buffer_head *di_bh,
2423                                    struct ocfs2_caching_info *ref_ci,
2424                                    struct buffer_head *ref_root_bh)
2425{
2426
2427        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2428        struct ocfs2_xattr_header *header;
2429        int ret;
2430        struct ocfs2_xattr_value_buf vb = {
2431                .vb_bh = di_bh,
2432                .vb_access = ocfs2_journal_access_di,
2433        };
2434
2435        header = (struct ocfs2_xattr_header *)
2436                 ((void *)di + inode->i_sb->s_blocksize -
2437                 le16_to_cpu(di->i_xattr_inline_size));
2438
2439        ret = ocfs2_remove_value_outside(inode, &vb, header,
2440                                         ref_ci, ref_root_bh);
2441
2442        return ret;
2443}
2444
2445struct ocfs2_rm_xattr_bucket_para {
2446        struct ocfs2_caching_info *ref_ci;
2447        struct buffer_head *ref_root_bh;
2448};
2449
2450static int ocfs2_xattr_block_remove(struct inode *inode,
2451                                    struct buffer_head *blk_bh,
2452                                    struct ocfs2_caching_info *ref_ci,
2453                                    struct buffer_head *ref_root_bh)
2454{
2455        struct ocfs2_xattr_block *xb;
2456        int ret = 0;
2457        struct ocfs2_xattr_value_buf vb = {
2458                .vb_bh = blk_bh,
2459                .vb_access = ocfs2_journal_access_xb,
2460        };
2461        struct ocfs2_rm_xattr_bucket_para args = {
2462                .ref_ci = ref_ci,
2463                .ref_root_bh = ref_root_bh,
2464        };
2465
2466        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2467        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2468                struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2469                ret = ocfs2_remove_value_outside(inode, &vb, header,
2470                                                 ref_ci, ref_root_bh);
2471        } else
2472                ret = ocfs2_iterate_xattr_index_block(inode,
2473                                                blk_bh,
2474                                                ocfs2_rm_xattr_cluster,
2475                                                &args);
2476
2477        return ret;
2478}
2479
2480static int ocfs2_xattr_free_block(struct inode *inode,
2481                                  u64 block,
2482                                  struct ocfs2_caching_info *ref_ci,
2483                                  struct buffer_head *ref_root_bh)
2484{
2485        struct inode *xb_alloc_inode;
2486        struct buffer_head *xb_alloc_bh = NULL;
2487        struct buffer_head *blk_bh = NULL;
2488        struct ocfs2_xattr_block *xb;
2489        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2490        handle_t *handle;
2491        int ret = 0;
2492        u64 blk, bg_blkno;
2493        u16 bit;
2494
2495        ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2496        if (ret < 0) {
2497                mlog_errno(ret);
2498                goto out;
2499        }
2500
2501        ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2502        if (ret < 0) {
2503                mlog_errno(ret);
2504                goto out;
2505        }
2506
2507        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2508        blk = le64_to_cpu(xb->xb_blkno);
2509        bit = le16_to_cpu(xb->xb_suballoc_bit);
2510        if (xb->xb_suballoc_loc)
2511                bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2512        else
2513                bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2514
2515        xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2516                                EXTENT_ALLOC_SYSTEM_INODE,
2517                                le16_to_cpu(xb->xb_suballoc_slot));
2518        if (!xb_alloc_inode) {
2519                ret = -ENOMEM;
2520                mlog_errno(ret);
2521                goto out;
2522        }
2523        inode_lock(xb_alloc_inode);
2524
2525        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2526        if (ret < 0) {
2527                mlog_errno(ret);
2528                goto out_mutex;
2529        }
2530
2531        handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2532        if (IS_ERR(handle)) {
2533                ret = PTR_ERR(handle);
2534                mlog_errno(ret);
2535                goto out_unlock;
2536        }
2537
2538        ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2539                                       bit, bg_blkno, 1);
2540        if (ret < 0)
2541                mlog_errno(ret);
2542
2543        ocfs2_commit_trans(osb, handle);
2544out_unlock:
2545        ocfs2_inode_unlock(xb_alloc_inode, 1);
2546        brelse(xb_alloc_bh);
2547out_mutex:
2548        inode_unlock(xb_alloc_inode);
2549        iput(xb_alloc_inode);
2550out:
2551        brelse(blk_bh);
2552        return ret;
2553}
2554
2555/*
2556 * ocfs2_xattr_remove()
2557 *
2558 * Free extended attribute resources associated with this inode.
2559 */
2560int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2561{
2562        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2563        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2564        struct ocfs2_refcount_tree *ref_tree = NULL;
2565        struct buffer_head *ref_root_bh = NULL;
2566        struct ocfs2_caching_info *ref_ci = NULL;
2567        handle_t *handle;
2568        int ret;
2569
2570        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2571                return 0;
2572
2573        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2574                return 0;
2575
2576        if (ocfs2_is_refcount_inode(inode)) {
2577                ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2578                                               le64_to_cpu(di->i_refcount_loc),
2579                                               1, &ref_tree, &ref_root_bh);
2580                if (ret) {
2581                        mlog_errno(ret);
2582                        goto out;
2583                }
2584                ref_ci = &ref_tree->rf_ci;
2585
2586        }
2587
2588        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2589                ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2590                                               ref_ci, ref_root_bh);
2591                if (ret < 0) {
2592                        mlog_errno(ret);
2593                        goto out;
2594                }
2595        }
2596
2597        if (di->i_xattr_loc) {
2598                ret = ocfs2_xattr_free_block(inode,
2599                                             le64_to_cpu(di->i_xattr_loc),
2600                                             ref_ci, ref_root_bh);
2601                if (ret < 0) {
2602                        mlog_errno(ret);
2603                        goto out;
2604                }
2605        }
2606
2607        handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2608                                   OCFS2_INODE_UPDATE_CREDITS);
2609        if (IS_ERR(handle)) {
2610                ret = PTR_ERR(handle);
2611                mlog_errno(ret);
2612                goto out;
2613        }
2614        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2615                                      OCFS2_JOURNAL_ACCESS_WRITE);
2616        if (ret) {
2617                mlog_errno(ret);
2618                goto out_commit;
2619        }
2620
2621        di->i_xattr_loc = 0;
2622
2623        spin_lock(&oi->ip_lock);
2624        oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2625        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2626        spin_unlock(&oi->ip_lock);
2627        ocfs2_update_inode_fsync_trans(handle, inode, 0);
2628
2629        ocfs2_journal_dirty(handle, di_bh);
2630out_commit:
2631        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2632out:
2633        if (ref_tree)
2634                ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2635        brelse(ref_root_bh);
2636        return ret;
2637}
2638
2639static int ocfs2_xattr_has_space_inline(struct inode *inode,
2640                                        struct ocfs2_dinode *di)
2641{
2642        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2643        unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2644        int free;
2645
2646        if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2647                return 0;
2648
2649        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2650                struct ocfs2_inline_data *idata = &di->id2.i_data;
2651                free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2652        } else if (ocfs2_inode_is_fast_symlink(inode)) {
2653                free = ocfs2_fast_symlink_chars(inode->i_sb) -
2654                        le64_to_cpu(di->i_size);
2655        } else {
2656                struct ocfs2_extent_list *el = &di->id2.i_list;
2657                free = (le16_to_cpu(el->l_count) -
2658                        le16_to_cpu(el->l_next_free_rec)) *
2659                        sizeof(struct ocfs2_extent_rec);
2660        }
2661        if (free >= xattrsize)
2662                return 1;
2663
2664        return 0;
2665}
2666
2667/*
2668 * ocfs2_xattr_ibody_find()
2669 *
2670 * Find extended attribute in inode block and
2671 * fill search info into struct ocfs2_xattr_search.
2672 */
2673static int ocfs2_xattr_ibody_find(struct inode *inode,
2674                                  int name_index,
2675                                  const char *name,
2676                                  struct ocfs2_xattr_search *xs)
2677{
2678        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2679        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2680        int ret;
2681        int has_space = 0;
2682
2683        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2684                return 0;
2685
2686        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2687                down_read(&oi->ip_alloc_sem);
2688                has_space = ocfs2_xattr_has_space_inline(inode, di);
2689                up_read(&oi->ip_alloc_sem);
2690                if (!has_space)
2691                        return 0;
2692        }
2693
2694        xs->xattr_bh = xs->inode_bh;
2695        xs->end = (void *)di + inode->i_sb->s_blocksize;
2696        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2697                xs->header = (struct ocfs2_xattr_header *)
2698                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2699        else
2700                xs->header = (struct ocfs2_xattr_header *)
2701                        (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2702        xs->base = (void *)xs->header;
2703        xs->here = xs->header->xh_entries;
2704
2705        /* Find the named attribute. */
2706        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2707                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2708                if (ret && ret != -ENODATA)
2709                        return ret;
2710                xs->not_found = ret;
2711        }
2712
2713        return 0;
2714}
2715
2716static int ocfs2_xattr_ibody_init(struct inode *inode,
2717                                  struct buffer_head *di_bh,
2718                                  struct ocfs2_xattr_set_ctxt *ctxt)
2719{
2720        int ret;
2721        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2722        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2723        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2724        unsigned int xattrsize = osb->s_xattr_inline_size;
2725
2726        if (!ocfs2_xattr_has_space_inline(inode, di)) {
2727                ret = -ENOSPC;
2728                goto out;
2729        }
2730
2731        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2732                                      OCFS2_JOURNAL_ACCESS_WRITE);
2733        if (ret) {
2734                mlog_errno(ret);
2735                goto out;
2736        }
2737
2738        /*
2739         * Adjust extent record count or inline data size
2740         * to reserve space for extended attribute.
2741         */
2742        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2743                struct ocfs2_inline_data *idata = &di->id2.i_data;
2744                le16_add_cpu(&idata->id_count, -xattrsize);
2745        } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2746                struct ocfs2_extent_list *el = &di->id2.i_list;
2747                le16_add_cpu(&el->l_count, -(xattrsize /
2748                                             sizeof(struct ocfs2_extent_rec)));
2749        }
2750        di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2751
2752        spin_lock(&oi->ip_lock);
2753        oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2754        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2755        spin_unlock(&oi->ip_lock);
2756
2757        ocfs2_journal_dirty(ctxt->handle, di_bh);
2758
2759out:
2760        return ret;
2761}
2762
2763/*
2764 * ocfs2_xattr_ibody_set()
2765 *
2766 * Set, replace or remove an extended attribute into inode block.
2767 *
2768 */
2769static int ocfs2_xattr_ibody_set(struct inode *inode,
2770                                 struct ocfs2_xattr_info *xi,
2771                                 struct ocfs2_xattr_search *xs,
2772                                 struct ocfs2_xattr_set_ctxt *ctxt)
2773{
2774        int ret;
2775        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2776        struct ocfs2_xa_loc loc;
2777
2778        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2779                return -ENOSPC;
2780
2781        down_write(&oi->ip_alloc_sem);
2782        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2783                ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2784                if (ret) {
2785                        if (ret != -ENOSPC)
2786                                mlog_errno(ret);
2787                        goto out;
2788                }
2789        }
2790
2791        ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2792                                 xs->not_found ? NULL : xs->here);
2793        ret = ocfs2_xa_set(&loc, xi, ctxt);
2794        if (ret) {
2795                if (ret != -ENOSPC)
2796                        mlog_errno(ret);
2797                goto out;
2798        }
2799        xs->here = loc.xl_entry;
2800
2801out:
2802        up_write(&oi->ip_alloc_sem);
2803
2804        return ret;
2805}
2806
2807/*
2808 * ocfs2_xattr_block_find()
2809 *
2810 * Find extended attribute in external block and
2811 * fill search info into struct ocfs2_xattr_search.
2812 */
2813static int ocfs2_xattr_block_find(struct inode *inode,
2814                                  int name_index,
2815                                  const char *name,
2816                                  struct ocfs2_xattr_search *xs)
2817{
2818        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2819        struct buffer_head *blk_bh = NULL;
2820        struct ocfs2_xattr_block *xb;
2821        int ret = 0;
2822
2823        if (!di->i_xattr_loc)
2824                return ret;
2825
2826        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2827                                     &blk_bh);
2828        if (ret < 0) {
2829                mlog_errno(ret);
2830                return ret;
2831        }
2832
2833        xs->xattr_bh = blk_bh;
2834        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2835
2836        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2837                xs->header = &xb->xb_attrs.xb_header;
2838                xs->base = (void *)xs->header;
2839                xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2840                xs->here = xs->header->xh_entries;
2841
2842                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2843        } else
2844                ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2845                                                   name_index,
2846                                                   name, xs);
2847
2848        if (ret && ret != -ENODATA) {
2849                xs->xattr_bh = NULL;
2850                goto cleanup;
2851        }
2852        xs->not_found = ret;
2853        return 0;
2854cleanup:
2855        brelse(blk_bh);
2856
2857        return ret;
2858}
2859
2860static int ocfs2_create_xattr_block(struct inode *inode,
2861                                    struct buffer_head *inode_bh,
2862                                    struct ocfs2_xattr_set_ctxt *ctxt,
2863                                    int indexed,
2864                                    struct buffer_head **ret_bh)
2865{
2866        int ret;
2867        u16 suballoc_bit_start;
2868        u32 num_got;
2869        u64 suballoc_loc, first_blkno;
2870        struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2871        struct buffer_head *new_bh = NULL;
2872        struct ocfs2_xattr_block *xblk;
2873
2874        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2875                                      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2876        if (ret < 0) {
2877                mlog_errno(ret);
2878                goto end;
2879        }
2880
2881        ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2882                                   &suballoc_loc, &suballoc_bit_start,
2883                                   &num_got, &first_blkno);
2884        if (ret < 0) {
2885                mlog_errno(ret);
2886                goto end;
2887        }
2888
2889        new_bh = sb_getblk(inode->i_sb, first_blkno);
2890        if (!new_bh) {
2891                ret = -ENOMEM;
2892                mlog_errno(ret);
2893                goto end;
2894        }
2895
2896        ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2897
2898        ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2899                                      new_bh,
2900                                      OCFS2_JOURNAL_ACCESS_CREATE);
2901        if (ret < 0) {
2902                mlog_errno(ret);
2903                goto end;
2904        }
2905
2906        /* Initialize ocfs2_xattr_block */
2907        xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2908        memset(xblk, 0, inode->i_sb->s_blocksize);
2909        strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2910        xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2911        xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2912        xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2913        xblk->xb_fs_generation =
2914                cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2915        xblk->xb_blkno = cpu_to_le64(first_blkno);
2916        if (indexed) {
2917                struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2918                xr->xt_clusters = cpu_to_le32(1);
2919                xr->xt_last_eb_blk = 0;
2920                xr->xt_list.l_tree_depth = 0;
2921                xr->xt_list.l_count = cpu_to_le16(
2922                                        ocfs2_xattr_recs_per_xb(inode->i_sb));
2923                xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2924                xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2925        }
2926        ocfs2_journal_dirty(ctxt->handle, new_bh);
2927
2928        /* Add it to the inode */
2929        di->i_xattr_loc = cpu_to_le64(first_blkno);
2930
2931        spin_lock(&OCFS2_I(inode)->ip_lock);
2932        OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2933        di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2934        spin_unlock(&OCFS2_I(inode)->ip_lock);
2935
2936        ocfs2_journal_dirty(ctxt->handle, inode_bh);
2937
2938        *ret_bh = new_bh;
2939        new_bh = NULL;
2940
2941end:
2942        brelse(new_bh);
2943        return ret;
2944}
2945
2946/*
2947 * ocfs2_xattr_block_set()
2948 *
2949 * Set, replace or remove an extended attribute into external block.
2950 *
2951 */
2952static int ocfs2_xattr_block_set(struct inode *inode,
2953                                 struct ocfs2_xattr_info *xi,
2954                                 struct ocfs2_xattr_search *xs,
2955                                 struct ocfs2_xattr_set_ctxt *ctxt)
2956{
2957        struct buffer_head *new_bh = NULL;
2958        struct ocfs2_xattr_block *xblk = NULL;
2959        int ret;
2960        struct ocfs2_xa_loc loc;
2961
2962        if (!xs->xattr_bh) {
2963                ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2964                                               0, &new_bh);
2965                if (ret) {
2966                        mlog_errno(ret);
2967                        goto end;
2968                }
2969
2970                xs->xattr_bh = new_bh;
2971                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2972                xs->header = &xblk->xb_attrs.xb_header;
2973                xs->base = (void *)xs->header;
2974                xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2975                xs->here = xs->header->xh_entries;
2976        } else
2977                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2978
2979        if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2980                ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2981                                              xs->not_found ? NULL : xs->here);
2982
2983                ret = ocfs2_xa_set(&loc, xi, ctxt);
2984                if (!ret)
2985                        xs->here = loc.xl_entry;
2986                else if ((ret != -ENOSPC) || ctxt->set_abort)
2987                        goto end;
2988                else {
2989                        ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2990                        if (ret)
2991                                goto end;
2992                }
2993        }
2994
2995        if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2996                ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2997
2998end:
2999        return ret;
3000}
3001
3002/* Check whether the new xattr can be inserted into the inode. */
3003static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
3004                                       struct ocfs2_xattr_info *xi,
3005                                       struct ocfs2_xattr_search *xs)
3006{
3007        struct ocfs2_xattr_entry *last;
3008        int free, i;
3009        size_t min_offs = xs->end - xs->base;
3010
3011        if (!xs->header)
3012                return 0;
3013
3014        last = xs->header->xh_entries;
3015
3016        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3017                size_t offs = le16_to_cpu(last->xe_name_offset);
3018                if (offs < min_offs)
3019                        min_offs = offs;
3020                last += 1;
3021        }
3022
3023        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3024        if (free < 0)
3025                return 0;
3026
3027        BUG_ON(!xs->not_found);
3028
3029        if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3030                return 1;
3031
3032        return 0;
3033}
3034
3035static int ocfs2_calc_xattr_set_need(struct inode *inode,
3036                                     struct ocfs2_dinode *di,
3037                                     struct ocfs2_xattr_info *xi,
3038                                     struct ocfs2_xattr_search *xis,
3039                                     struct ocfs2_xattr_search *xbs,
3040                                     int *clusters_need,
3041                                     int *meta_need,
3042                                     int *credits_need)
3043{
3044        int ret = 0, old_in_xb = 0;
3045        int clusters_add = 0, meta_add = 0, credits = 0;
3046        struct buffer_head *bh = NULL;
3047        struct ocfs2_xattr_block *xb = NULL;
3048        struct ocfs2_xattr_entry *xe = NULL;
3049        struct ocfs2_xattr_value_root *xv = NULL;
3050        char *base = NULL;
3051        int name_offset, name_len = 0;
3052        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3053                                                    xi->xi_value_len);
3054        u64 value_size;
3055
3056        /*
3057         * Calculate the clusters we need to write.
3058         * No matter whether we replace an old one or add a new one,
3059         * we need this for writing.
3060         */
3061        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3062                credits += new_clusters *
3063                           ocfs2_clusters_to_blocks(inode->i_sb, 1);
3064
3065        if (xis->not_found && xbs->not_found) {
3066                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3067
3068                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3069                        clusters_add += new_clusters;
3070                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3071                                                        &def_xv.xv.xr_list);
3072                }
3073
3074                goto meta_guess;
3075        }
3076
3077        if (!xis->not_found) {
3078                xe = xis->here;
3079                name_offset = le16_to_cpu(xe->xe_name_offset);
3080                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3081                base = xis->base;
3082                credits += OCFS2_INODE_UPDATE_CREDITS;
3083        } else {
3084                int i, block_off = 0;
3085                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3086                xe = xbs->here;
3087                name_offset = le16_to_cpu(xe->xe_name_offset);
3088                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3089                i = xbs->here - xbs->header->xh_entries;
3090                old_in_xb = 1;
3091
3092                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3093                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3094                                                        bucket_xh(xbs->bucket),
3095                                                        i, &block_off,
3096                                                        &name_offset);
3097                        base = bucket_block(xbs->bucket, block_off);
3098                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3099                } else {
3100                        base = xbs->base;
3101                        credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3102                }
3103        }
3104
3105        /*
3106         * delete a xattr doesn't need metadata and cluster allocation.
3107         * so just calculate the credits and return.
3108         *
3109         * The credits for removing the value tree will be extended
3110         * by ocfs2_remove_extent itself.
3111         */
3112        if (!xi->xi_value) {
3113                if (!ocfs2_xattr_is_local(xe))
3114                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3115
3116                goto out;
3117        }
3118
3119        /* do cluster allocation guess first. */
3120        value_size = le64_to_cpu(xe->xe_value_size);
3121
3122        if (old_in_xb) {
3123                /*
3124                 * In xattr set, we always try to set the xe in inode first,
3125                 * so if it can be inserted into inode successfully, the old
3126                 * one will be removed from the xattr block, and this xattr
3127                 * will be inserted into inode as a new xattr in inode.
3128                 */
3129                if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3130                        clusters_add += new_clusters;
3131                        credits += ocfs2_remove_extent_credits(inode->i_sb) +
3132                                    OCFS2_INODE_UPDATE_CREDITS;
3133                        if (!ocfs2_xattr_is_local(xe))
3134                                credits += ocfs2_calc_extend_credits(
3135                                                        inode->i_sb,
3136                                                        &def_xv.xv.xr_list);
3137                        goto out;
3138                }
3139        }
3140
3141        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3142                /* the new values will be stored outside. */
3143                u32 old_clusters = 0;
3144
3145                if (!ocfs2_xattr_is_local(xe)) {
3146                        old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3147                                                                 value_size);
3148                        xv = (struct ocfs2_xattr_value_root *)
3149                             (base + name_offset + name_len);
3150                        value_size = OCFS2_XATTR_ROOT_SIZE;
3151                } else
3152                        xv = &def_xv.xv;
3153
3154                if (old_clusters >= new_clusters) {
3155                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3156                        goto out;
3157                } else {
3158                        meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3159                        clusters_add += new_clusters - old_clusters;
3160                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3161                                                             &xv->xr_list);
3162                        if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3163                                goto out;
3164                }
3165        } else {
3166                /*
3167                 * Now the new value will be stored inside. So if the new
3168                 * value is smaller than the size of value root or the old
3169                 * value, we don't need any allocation, otherwise we have
3170                 * to guess metadata allocation.
3171                 */
3172                if ((ocfs2_xattr_is_local(xe) &&
3173                     (value_size >= xi->xi_value_len)) ||
3174                    (!ocfs2_xattr_is_local(xe) &&
3175                     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3176                        goto out;
3177        }
3178
3179meta_guess:
3180        /* calculate metadata allocation. */
3181        if (di->i_xattr_loc) {
3182                if (!xbs->xattr_bh) {
3183                        ret = ocfs2_read_xattr_block(inode,
3184                                                     le64_to_cpu(di->i_xattr_loc),
3185                                                     &bh);
3186                        if (ret) {
3187                                mlog_errno(ret);
3188                                goto out;
3189                        }
3190
3191                        xb = (struct ocfs2_xattr_block *)bh->b_data;
3192                } else
3193                        xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3194
3195                /*
3196                 * If there is already an xattr tree, good, we can calculate
3197                 * like other b-trees. Otherwise we may have the chance of
3198                 * create a tree, the credit calculation is borrowed from
3199                 * ocfs2_calc_extend_credits with root_el = NULL. And the
3200                 * new tree will be cluster based, so no meta is needed.
3201                 */
3202                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3203                        struct ocfs2_extent_list *el =
3204                                 &xb->xb_attrs.xb_root.xt_list;
3205                        meta_add += ocfs2_extend_meta_needed(el);
3206                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3207                                                             el);
3208                } else
3209                        credits += OCFS2_SUBALLOC_ALLOC + 1;
3210
3211                /*
3212                 * This cluster will be used either for new bucket or for
3213                 * new xattr block.
3214                 * If the cluster size is the same as the bucket size, one
3215                 * more is needed since we may need to extend the bucket
3216                 * also.
3217                 */
3218                clusters_add += 1;
3219                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3220                if (OCFS2_XATTR_BUCKET_SIZE ==
3221                        OCFS2_SB(inode->i_sb)->s_clustersize) {
3222                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3223                        clusters_add += 1;
3224                }
3225        } else {
3226                credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3227                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3228                        struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3229                        meta_add += ocfs2_extend_meta_needed(el);
3230                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3231                                                             el);
3232                } else {
3233                        meta_add += 1;
3234                }
3235        }
3236out:
3237        if (clusters_need)
3238                *clusters_need = clusters_add;
3239        if (meta_need)
3240                *meta_need = meta_add;
3241        if (credits_need)
3242                *credits_need = credits;
3243        brelse(bh);
3244        return ret;
3245}
3246
3247static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3248                                     struct ocfs2_dinode *di,
3249                                     struct ocfs2_xattr_info *xi,
3250                                     struct ocfs2_xattr_search *xis,
3251                                     struct ocfs2_xattr_search *xbs,
3252                                     struct ocfs2_xattr_set_ctxt *ctxt,
3253                                     int extra_meta,
3254                                     int *credits)
3255{
3256        int clusters_add, meta_add, ret;
3257        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3258
3259        memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3260
3261        ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3262
3263        ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3264                                        &clusters_add, &meta_add, credits);
3265        if (ret) {
3266                mlog_errno(ret);
3267                return ret;
3268        }
3269
3270        meta_add += extra_meta;
3271        trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3272                                        clusters_add, *credits);
3273
3274        if (meta_add) {
3275                ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3276                                                        &ctxt->meta_ac);
3277                if (ret) {
3278                        mlog_errno(ret);
3279                        goto out;
3280                }
3281        }
3282
3283        if (clusters_add) {
3284                ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3285                if (ret)
3286                        mlog_errno(ret);
3287        }
3288out:
3289        if (ret) {
3290                if (ctxt->meta_ac) {
3291                        ocfs2_free_alloc_context(ctxt->meta_ac);
3292                        ctxt->meta_ac = NULL;
3293                }
3294
3295                /*
3296                 * We cannot have an error and a non null ctxt->data_ac.
3297                 */
3298        }
3299
3300        return ret;
3301}
3302
3303static int __ocfs2_xattr_set_handle(struct inode *inode,
3304                                    struct ocfs2_dinode *di,
3305                                    struct ocfs2_xattr_info *xi,
3306                                    struct ocfs2_xattr_search *xis,
3307                                    struct ocfs2_xattr_search *xbs,
3308                                    struct ocfs2_xattr_set_ctxt *ctxt)
3309{
3310        int ret = 0, credits, old_found;
3311
3312        if (!xi->xi_value) {
3313                /* Remove existing extended attribute */
3314                if (!xis->not_found)
3315                        ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3316                else if (!xbs->not_found)
3317                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3318        } else {
3319                /* We always try to set extended attribute into inode first*/
3320                ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3321                if (!ret && !xbs->not_found) {
3322                        /*
3323                         * If succeed and that extended attribute existing in
3324                         * external block, then we will remove it.
3325                         */
3326                        xi->xi_value = NULL;
3327                        xi->xi_value_len = 0;
3328
3329                        old_found = xis->not_found;
3330                        xis->not_found = -ENODATA;
3331                        ret = ocfs2_calc_xattr_set_need(inode,
3332                                                        di,
3333                                                        xi,
3334                                                        xis,
3335                                                        xbs,
3336                                                        NULL,
3337                                                        NULL,
3338                                                        &credits);
3339                        xis->not_found = old_found;
3340                        if (ret) {
3341                                mlog_errno(ret);
3342                                goto out;
3343                        }
3344
3345                        ret = ocfs2_extend_trans(ctxt->handle, credits);
3346                        if (ret) {
3347                                mlog_errno(ret);
3348                                goto out;
3349                        }
3350                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3351                } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3352                        if (di->i_xattr_loc && !xbs->xattr_bh) {
3353                                ret = ocfs2_xattr_block_find(inode,
3354                                                             xi->xi_name_index,
3355                                                             xi->xi_name, xbs);
3356                                if (ret)
3357                                        goto out;
3358
3359                                old_found = xis->not_found;
3360                                xis->not_found = -ENODATA;
3361                                ret = ocfs2_calc_xattr_set_need(inode,
3362                                                                di,
3363                                                                xi,
3364                                                                xis,
3365                                                                xbs,
3366                                                                NULL,
3367                                                                NULL,
3368                                                                &credits);
3369                                xis->not_found = old_found;
3370                                if (ret) {
3371                                        mlog_errno(ret);
3372                                        goto out;
3373                                }
3374
3375                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3376                                if (ret) {
3377                                        mlog_errno(ret);
3378                                        goto out;
3379                                }
3380                        }
3381                        /*
3382                         * If no space in inode, we will set extended attribute
3383                         * into external block.
3384                         */
3385                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3386                        if (ret)
3387                                goto out;
3388                        if (!xis->not_found) {
3389                                /*
3390                                 * If succeed and that extended attribute
3391                                 * existing in inode, we will remove it.
3392                                 */
3393                                xi->xi_value = NULL;
3394                                xi->xi_value_len = 0;
3395                                xbs->not_found = -ENODATA;
3396                                ret = ocfs2_calc_xattr_set_need(inode,
3397                                                                di,
3398                                                                xi,
3399                                                                xis,
3400                                                                xbs,
3401                                                                NULL,
3402                                                                NULL,
3403                                                                &credits);
3404                                if (ret) {
3405                                        mlog_errno(ret);
3406                                        goto out;
3407                                }
3408
3409                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3410                                if (ret) {
3411                                        mlog_errno(ret);
3412                                        goto out;
3413                                }
3414                                ret = ocfs2_xattr_ibody_set(inode, xi,
3415                                                            xis, ctxt);
3416                        }
3417                }
3418        }
3419
3420        if (!ret) {
3421                /* Update inode ctime. */
3422                ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3423                                              xis->inode_bh,
3424                                              OCFS2_JOURNAL_ACCESS_WRITE);
3425                if (ret) {
3426                        mlog_errno(ret);
3427                        goto out;
3428                }
3429
3430                inode->i_ctime = current_time(inode);
3431                di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3432                di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3433                ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3434        }
3435out:
3436        return ret;
3437}
3438
3439/*
3440 * This function only called duing creating inode
3441 * for init security/acl xattrs of the new inode.
3442 * All transanction credits have been reserved in mknod.
3443 */
3444int ocfs2_xattr_set_handle(handle_t *handle,
3445                           struct inode *inode,
3446                           struct buffer_head *di_bh,
3447                           int name_index,
3448                           const char *name,
3449                           const void *value,
3450                           size_t value_len,
3451                           int flags,
3452                           struct ocfs2_alloc_context *meta_ac,
3453                           struct ocfs2_alloc_context *data_ac)
3454{
3455        struct ocfs2_dinode *di;
3456        int ret;
3457
3458        struct ocfs2_xattr_info xi = {
3459                .xi_name_index = name_index,
3460                .xi_name = name,
3461                .xi_name_len = strlen(name),
3462                .xi_value = value,
3463                .xi_value_len = value_len,
3464        };
3465
3466        struct ocfs2_xattr_search xis = {
3467                .not_found = -ENODATA,
3468        };
3469
3470        struct ocfs2_xattr_search xbs = {
3471                .not_found = -ENODATA,
3472        };
3473
3474        struct ocfs2_xattr_set_ctxt ctxt = {
3475                .handle = handle,
3476                .meta_ac = meta_ac,
3477                .data_ac = data_ac,
3478        };
3479
3480        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3481                return -EOPNOTSUPP;
3482
3483        /*
3484         * In extreme situation, may need xattr bucket when
3485         * block size is too small. And we have already reserved
3486         * the credits for bucket in mknod.
3487         */
3488        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3489                xbs.bucket = ocfs2_xattr_bucket_new(inode);
3490                if (!xbs.bucket) {
3491                        mlog_errno(-ENOMEM);
3492                        return -ENOMEM;
3493                }
3494        }
3495
3496        xis.inode_bh = xbs.inode_bh = di_bh;
3497        di = (struct ocfs2_dinode *)di_bh->b_data;
3498
3499        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3500
3501        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3502        if (ret)
3503                goto cleanup;
3504        if (xis.not_found) {
3505                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3506                if (ret)
3507                        goto cleanup;
3508        }
3509
3510        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3511
3512cleanup:
3513        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3514        brelse(xbs.xattr_bh);
3515        ocfs2_xattr_bucket_free(xbs.bucket);
3516
3517        return ret;
3518}
3519
3520/*
3521 * ocfs2_xattr_set()
3522 *
3523 * Set, replace or remove an extended attribute for this inode.
3524 * value is NULL to remove an existing extended attribute, else either
3525 * create or replace an extended attribute.
3526 */
3527int ocfs2_xattr_set(struct inode *inode,
3528                    int name_index,
3529                    const char *name,
3530                    const void *value,
3531                    size_t value_len,
3532                    int flags)
3533{
3534        struct buffer_head *di_bh = NULL;
3535        struct ocfs2_dinode *di;
3536        int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
3537        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3538        struct inode *tl_inode = osb->osb_tl_inode;
3539        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3540        struct ocfs2_refcount_tree *ref_tree = NULL;
3541        struct ocfs2_lock_holder oh;
3542
3543        struct ocfs2_xattr_info xi = {
3544                .xi_name_index = name_index,
3545                .xi_name = name,
3546                .xi_name_len = strlen(name),
3547                .xi_value = value,
3548                .xi_value_len = value_len,
3549        };
3550
3551        struct ocfs2_xattr_search xis = {
3552                .not_found = -ENODATA,
3553        };
3554
3555        struct ocfs2_xattr_search xbs = {
3556                .not_found = -ENODATA,
3557        };
3558
3559        if (!ocfs2_supports_xattr(osb))
3560                return -EOPNOTSUPP;
3561
3562        /*
3563         * Only xbs will be used on indexed trees.  xis doesn't need a
3564         * bucket.
3565         */
3566        xbs.bucket = ocfs2_xattr_bucket_new(inode);
3567        if (!xbs.bucket) {
3568                mlog_errno(-ENOMEM);
3569                return -ENOMEM;
3570        }
3571
3572        had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
3573        if (had_lock < 0) {
3574                ret = had_lock;
3575                mlog_errno(ret);
3576                goto cleanup_nolock;
3577        }
3578        xis.inode_bh = xbs.inode_bh = di_bh;
3579        di = (struct ocfs2_dinode *)di_bh->b_data;
3580
3581        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3582        /*
3583         * Scan inode and external block to find the same name
3584         * extended attribute and collect search information.
3585         */
3586        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3587        if (ret)
3588                goto cleanup;
3589        if (xis.not_found) {
3590                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3591                if (ret)
3592                        goto cleanup;
3593        }
3594
3595        if (xis.not_found && xbs.not_found) {
3596                ret = -ENODATA;
3597                if (flags & XATTR_REPLACE)
3598                        goto cleanup;
3599                ret = 0;
3600                if (!value)
3601                        goto cleanup;
3602        } else {
3603                ret = -EEXIST;
3604                if (flags & XATTR_CREATE)
3605                        goto cleanup;
3606        }
3607
3608        /* Check whether the value is refcounted and do some preparation. */
3609        if (ocfs2_is_refcount_inode(inode) &&
3610            (!xis.not_found || !xbs.not_found)) {
3611                ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3612                                                   &xis, &xbs, &ref_tree,
3613                                                   &ref_meta, &ref_credits);
3614                if (ret) {
3615                        mlog_errno(ret);
3616                        goto cleanup;
3617                }
3618        }
3619
3620        inode_lock(tl_inode);
3621
3622        if (ocfs2_truncate_log_needs_flush(osb)) {
3623                ret = __ocfs2_flush_truncate_log(osb);
3624                if (ret < 0) {
3625                        inode_unlock(tl_inode);
3626                        mlog_errno(ret);
3627                        goto cleanup;
3628                }
3629        }
3630        inode_unlock(tl_inode);
3631
3632        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3633                                        &xbs, &ctxt, ref_meta, &credits);
3634        if (ret) {
3635                mlog_errno(ret);
3636                goto cleanup;
3637        }
3638
3639        /* we need to update inode's ctime field, so add credit for it. */
3640        credits += OCFS2_INODE_UPDATE_CREDITS;
3641        ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3642        if (IS_ERR(ctxt.handle)) {
3643                ret = PTR_ERR(ctxt.handle);
3644                mlog_errno(ret);
3645                goto out_free_ac;
3646        }
3647
3648        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3649        ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3650
3651        ocfs2_commit_trans(osb, ctxt.handle);
3652
3653out_free_ac:
3654        if (ctxt.data_ac)
3655                ocfs2_free_alloc_context(ctxt.data_ac);
3656        if (ctxt.meta_ac)
3657                ocfs2_free_alloc_context(ctxt.meta_ac);
3658        if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3659                ocfs2_schedule_truncate_log_flush(osb, 1);
3660        ocfs2_run_deallocs(osb, &ctxt.dealloc);
3661
3662cleanup:
3663        if (ref_tree)
3664                ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3665        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3666        if (!value && !ret) {
3667                ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3668                if (ret)
3669                        mlog_errno(ret);
3670        }
3671        ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
3672cleanup_nolock:
3673        brelse(di_bh);
3674        brelse(xbs.xattr_bh);
3675        ocfs2_xattr_bucket_free(xbs.bucket);
3676
3677        return ret;
3678}
3679
3680/*
3681 * Find the xattr extent rec which may contains name_hash.
3682 * e_cpos will be the first name hash of the xattr rec.
3683 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3684 */
3685static int ocfs2_xattr_get_rec(struct inode *inode,
3686                               u32 name_hash,
3687                               u64 *p_blkno,
3688                               u32 *e_cpos,
3689                               u32 *num_clusters,
3690                               struct ocfs2_extent_list *el)
3691{
3692        int ret = 0, i;
3693        struct buffer_head *eb_bh = NULL;
3694        struct ocfs2_extent_block *eb;
3695        struct ocfs2_extent_rec *rec = NULL;
3696        u64 e_blkno = 0;
3697
3698        if (el->l_tree_depth) {
3699                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3700                                      &eb_bh);
3701                if (ret) {
3702                        mlog_errno(ret);
3703                        goto out;
3704                }
3705
3706                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3707                el = &eb->h_list;
3708
3709                if (el->l_tree_depth) {
3710                        ret = ocfs2_error(inode->i_sb,
3711                                          "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3712                                          inode->i_ino,
3713                                          (unsigned long long)eb_bh->b_blocknr);
3714                        goto out;
3715                }
3716        }
3717
3718        for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3719                rec = &el->l_recs[i];
3720
3721                if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3722                        e_blkno = le64_to_cpu(rec->e_blkno);
3723                        break;
3724                }
3725        }
3726
3727        if (!e_blkno) {
3728                ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3729                                  inode->i_ino,
3730                                  le32_to_cpu(rec->e_cpos),
3731                                  ocfs2_rec_clusters(el, rec));
3732                goto out;
3733        }
3734
3735        *p_blkno = le64_to_cpu(rec->e_blkno);
3736        *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3737        if (e_cpos)
3738                *e_cpos = le32_to_cpu(rec->e_cpos);
3739out:
3740        brelse(eb_bh);
3741        return ret;
3742}
3743
3744typedef int (xattr_bucket_func)(struct inode *inode,
3745                                struct ocfs2_xattr_bucket *bucket,
3746                                void *para);
3747
3748static int ocfs2_find_xe_in_bucket(struct inode *inode,
3749                                   struct ocfs2_xattr_bucket *bucket,
3750                                   int name_index,
3751                                   const char *name,
3752                                   u32 name_hash,
3753                                   u16 *xe_index,
3754                                   int *found)
3755{
3756        int i, ret = 0, cmp = 1, block_off, new_offset;
3757        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3758        size_t name_len = strlen(name);
3759        struct ocfs2_xattr_entry *xe = NULL;
3760        char *xe_name;
3761
3762        /*
3763         * We don't use binary search in the bucket because there
3764         * may be multiple entries with the same name hash.
3765         */
3766        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3767                xe = &xh->xh_entries[i];
3768
3769                if (name_hash > le32_to_cpu(xe->xe_name_hash))
3770                        continue;
3771                else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3772                        break;
3773
3774                cmp = name_index - ocfs2_xattr_get_type(xe);
3775                if (!cmp)
3776                        cmp = name_len - xe->xe_name_len;
3777                if (cmp)
3778                        continue;
3779
3780                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3781                                                        xh,
3782                                                        i,
3783                                                        &block_off,
3784                                                        &new_offset);
3785                if (ret) {
3786                        mlog_errno(ret);
3787                        break;
3788                }
3789
3790
3791                xe_name = bucket_block(bucket, block_off) + new_offset;
3792                if (!memcmp(name, xe_name, name_len)) {
3793                        *xe_index = i;
3794                        *found = 1;
3795                        ret = 0;
3796                        break;
3797                }
3798        }
3799
3800        return ret;
3801}
3802
3803/*
3804 * Find the specified xattr entry in a series of buckets.
3805 * This series start from p_blkno and last for num_clusters.
3806 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3807 * the num of the valid buckets.
3808 *
3809 * Return the buffer_head this xattr should reside in. And if the xattr's
3810 * hash is in the gap of 2 buckets, return the lower bucket.
3811 */
3812static int ocfs2_xattr_bucket_find(struct inode *inode,
3813                                   int name_index,
3814                                   const char *name,
3815                                   u32 name_hash,
3816                                   u64 p_blkno,
3817                                   u32 first_hash,
3818                                   u32 num_clusters,
3819                                   struct ocfs2_xattr_search *xs)
3820{
3821        int ret, found = 0;
3822        struct ocfs2_xattr_header *xh = NULL;
3823        struct ocfs2_xattr_entry *xe = NULL;
3824        u16 index = 0;
3825        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3826        int low_bucket = 0, bucket, high_bucket;
3827        struct ocfs2_xattr_bucket *search;
3828        u64 blkno, lower_blkno = 0;
3829
3830        search = ocfs2_xattr_bucket_new(inode);
3831        if (!search) {
3832                ret = -ENOMEM;
3833                mlog_errno(ret);
3834                goto out;
3835        }
3836
3837        ret = ocfs2_read_xattr_bucket(search, p_blkno);
3838        if (ret) {
3839                mlog_errno(ret);
3840                goto out;
3841        }
3842
3843        xh = bucket_xh(search);
3844        high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3845        while (low_bucket <= high_bucket) {
3846                ocfs2_xattr_bucket_relse(search);
3847
3848                bucket = (low_bucket + high_bucket) / 2;
3849                blkno = p_blkno + bucket * blk_per_bucket;
3850                ret = ocfs2_read_xattr_bucket(search, blkno);
3851                if (ret) {
3852                        mlog_errno(ret);
3853                        goto out;
3854                }
3855
3856                xh = bucket_xh(search);
3857                xe = &xh->xh_entries[0];
3858                if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3859                        high_bucket = bucket - 1;
3860                        continue;
3861                }
3862
3863                /*
3864                 * Check whether the hash of the last entry in our
3865                 * bucket is larger than the search one. for an empty
3866                 * bucket, the last one is also the first one.
3867                 */
3868                if (xh->xh_count)
3869                        xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3870
3871                /* record lower_blkno which may be the insert place. */
3872                lower_blkno = blkno;
3873
3874                if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3875                        low_bucket = bucket + 1;
3876                        continue;
3877                }
3878
3879                /* the searched xattr should reside in this bucket if exists. */
3880                ret = ocfs2_find_xe_in_bucket(inode, search,
3881                                              name_index, name, name_hash,
3882                                              &index, &found);
3883                if (ret) {
3884                        mlog_errno(ret);
3885                        goto out;
3886                }
3887                break;
3888        }
3889
3890        /*
3891         * Record the bucket we have found.
3892         * When the xattr's hash value is in the gap of 2 buckets, we will
3893         * always set it to the previous bucket.
3894         */
3895        if (!lower_blkno)
3896                lower_blkno = p_blkno;
3897
3898        /* This should be in cache - we just read it during the search */
3899        ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3900        if (ret) {
3901                mlog_errno(ret);
3902                goto out;
3903        }
3904
3905        xs->header = bucket_xh(xs->bucket);
3906        xs->base = bucket_block(xs->bucket, 0);
3907        xs->end = xs->base + inode->i_sb->s_blocksize;
3908
3909        if (found) {
3910                xs->here = &xs->header->xh_entries[index];
3911                trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3912                        name, name_index, name_hash,
3913                        (unsigned long long)bucket_blkno(xs->bucket),
3914                        index);
3915        } else
3916                ret = -ENODATA;
3917
3918out:
3919        ocfs2_xattr_bucket_free(search);
3920        return ret;
3921}
3922
3923static int ocfs2_xattr_index_block_find(struct inode *inode,
3924                                        struct buffer_head *root_bh,
3925                                        int name_index,
3926                                        const char *name,
3927                                        struct ocfs2_xattr_search *xs)
3928{
3929        int ret;
3930        struct ocfs2_xattr_block *xb =
3931                        (struct ocfs2_xattr_block *)root_bh->b_data;
3932        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3933        struct ocfs2_extent_list *el = &xb_root->xt_list;
3934        u64 p_blkno = 0;
3935        u32 first_hash, num_clusters = 0;
3936        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3937
3938        if (le16_to_cpu(el->l_next_free_rec) == 0)
3939                return -ENODATA;
3940
3941        trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3942                                        name, name_index, name_hash,
3943                                        (unsigned long long)root_bh->b_blocknr,
3944                                        -1);
3945
3946        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3947                                  &num_clusters, el);
3948        if (ret) {
3949                mlog_errno(ret);
3950                goto out;
3951        }
3952
3953        BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3954
3955        trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3956                                        name, name_index, first_hash,
3957                                        (unsigned long long)p_blkno,
3958                                        num_clusters);
3959
3960        ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3961                                      p_blkno, first_hash, num_clusters, xs);
3962
3963out:
3964        return ret;
3965}
3966
3967static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3968                                       u64 blkno,
3969                                       u32 clusters,
3970                                       xattr_bucket_func *func,
3971                                       void *para)
3972{
3973        int i, ret = 0;
3974        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3975        u32 num_buckets = clusters * bpc;
3976        struct ocfs2_xattr_bucket *bucket;
3977
3978        bucket = ocfs2_xattr_bucket_new(inode);
3979        if (!bucket) {
3980                mlog_errno(-ENOMEM);
3981                return -ENOMEM;
3982        }
3983
3984        trace_ocfs2_iterate_xattr_buckets(
3985                (unsigned long long)OCFS2_I(inode)->ip_blkno,
3986                (unsigned long long)blkno, clusters);
3987
3988        for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3989                ret = ocfs2_read_xattr_bucket(bucket, blkno);
3990                if (ret) {
3991                        mlog_errno(ret);
3992                        break;
3993                }
3994
3995                /*
3996                 * The real bucket num in this series of blocks is stored
3997                 * in the 1st bucket.
3998                 */
3999                if (i == 0)
4000                        num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
4001
4002                trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
4003                     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
4004                if (func) {
4005                        ret = func(inode, bucket, para);
4006                        if (ret && ret != -ERANGE)
4007                                mlog_errno(ret);
4008                        /* Fall through to bucket_relse() */
4009                }
4010
4011                ocfs2_xattr_bucket_relse(bucket);
4012                if (ret)
4013                        break;
4014        }
4015
4016        ocfs2_xattr_bucket_free(bucket);
4017        return ret;
4018}
4019
4020struct ocfs2_xattr_tree_list {
4021        char *buffer;
4022        size_t buffer_size;
4023        size_t result;
4024};
4025
4026static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4027                                             struct ocfs2_xattr_header *xh,
4028                                             int index,
4029                                             int *block_off,
4030                                             int *new_offset)
4031{
4032        u16 name_offset;
4033
4034        if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4035                return -EINVAL;
4036
4037        name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4038
4039        *block_off = name_offset >> sb->s_blocksize_bits;
4040        *new_offset = name_offset % sb->s_blocksize;
4041
4042        return 0;
4043}
4044
4045static int ocfs2_list_xattr_bucket(struct inode *inode,
4046                                   struct ocfs2_xattr_bucket *bucket,
4047                                   void *para)
4048{
4049        int ret = 0, type;
4050        struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4051        int i, block_off, new_offset;
4052        const char *name;
4053
4054        for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4055                struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4056                type = ocfs2_xattr_get_type(entry);
4057
4058                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4059                                                        bucket_xh(bucket),
4060                                                        i,
4061                                                        &block_off,
4062                                                        &new_offset);
4063                if (ret)
4064                        break;
4065
4066                name = (const char *)bucket_block(bucket, block_off) +
4067                        new_offset;
4068                ret = ocfs2_xattr_list_entry(inode->i_sb,
4069                                             xl->buffer,
4070                                             xl->buffer_size,
4071                                             &xl->result,
4072                                             type, name,
4073                                             entry->xe_name_len);
4074                if (ret)
4075                        break;
4076        }
4077
4078        return ret;
4079}
4080
4081static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4082                                           struct buffer_head *blk_bh,
4083                                           xattr_tree_rec_func *rec_func,
4084                                           void *para)
4085{
4086        struct ocfs2_xattr_block *xb =
4087                        (struct ocfs2_xattr_block *)blk_bh->b_data;
4088        struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4089        int ret = 0;
4090        u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4091        u64 p_blkno = 0;
4092
4093        if (!el->l_next_free_rec || !rec_func)
4094                return 0;
4095
4096        while (name_hash > 0) {
4097                ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4098                                          &e_cpos, &num_clusters, el);
4099                if (ret) {
4100                        mlog_errno(ret);
4101                        break;
4102                }
4103
4104                ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4105                               num_clusters, para);
4106                if (ret) {
4107                        if (ret != -ERANGE)
4108                                mlog_errno(ret);
4109                        break;
4110                }
4111
4112                if (e_cpos == 0)
4113                        break;
4114
4115                name_hash = e_cpos - 1;
4116        }
4117
4118        return ret;
4119
4120}
4121
4122static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4123                                     struct buffer_head *root_bh,
4124                                     u64 blkno, u32 cpos, u32 len, void *para)
4125{
4126        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4127                                           ocfs2_list_xattr_bucket, para);
4128}
4129
4130static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4131                                             struct buffer_head *blk_bh,
4132                                             char *buffer,
4133                                             size_t buffer_size)
4134{
4135        int ret;
4136        struct ocfs2_xattr_tree_list xl = {
4137                .buffer = buffer,
4138                .buffer_size = buffer_size,
4139                .result = 0,
4140        };
4141
4142        ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4143                                              ocfs2_list_xattr_tree_rec, &xl);
4144        if (ret) {
4145                mlog_errno(ret);
4146                goto out;
4147        }
4148
4149        ret = xl.result;
4150out:
4151        return ret;
4152}
4153
4154static int cmp_xe(const void *a, const void *b)
4155{
4156        const struct ocfs2_xattr_entry *l = a, *r = b;
4157        u32 l_hash = le32_to_cpu(l->xe_name_hash);
4158        u32 r_hash = le32_to_cpu(r->xe_name_hash);
4159
4160        if (l_hash > r_hash)
4161                return 1;
4162        if (l_hash < r_hash)
4163                return -1;
4164        return 0;
4165}
4166
4167static void swap_xe(void *a, void *b, int size)
4168{
4169        struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4170
4171        tmp = *l;
4172        memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4173        memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4174}
4175
4176/*
4177 * When the ocfs2_xattr_block is filled up, new bucket will be created
4178 * and all the xattr entries will be moved to the new bucket.
4179 * The header goes at the start of the bucket, and the names+values are
4180 * filled from the end.  This is why *target starts as the last buffer.
4181 * Note: we need to sort the entries since they are not saved in order
4182 * in the ocfs2_xattr_block.
4183 */
4184static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4185                                           struct buffer_head *xb_bh,
4186                                           struct ocfs2_xattr_bucket *bucket)
4187{
4188        int i, blocksize = inode->i_sb->s_blocksize;
4189        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4190        u16 offset, size, off_change;
4191        struct ocfs2_xattr_entry *xe;
4192        struct ocfs2_xattr_block *xb =
4193                                (struct ocfs2_xattr_block *)xb_bh->b_data;
4194        struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4195        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4196        u16 count = le16_to_cpu(xb_xh->xh_count);
4197        char *src = xb_bh->b_data;
4198        char *target = bucket_block(bucket, blks - 1);
4199
4200        trace_ocfs2_cp_xattr_block_to_bucket_begin(
4201                                (unsigned long long)xb_bh->b_blocknr,
4202                                (unsigned long long)bucket_blkno(bucket));
4203
4204        for (i = 0; i < blks; i++)
4205                memset(bucket_block(bucket, i), 0, blocksize);
4206
4207        /*
4208         * Since the xe_name_offset is based on ocfs2_xattr_header,
4209         * there is a offset change corresponding to the change of
4210         * ocfs2_xattr_header's position.
4211         */
4212        off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4213        xe = &xb_xh->xh_entries[count - 1];
4214        offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4215        size = blocksize - offset;
4216
4217        /* copy all the names and values. */
4218        memcpy(target + offset, src + offset, size);
4219
4220        /* Init new header now. */
4221        xh->xh_count = xb_xh->xh_count;
4222        xh->xh_num_buckets = cpu_to_le16(1);
4223        xh->xh_name_value_len = cpu_to_le16(size);
4224        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4225
4226        /* copy all the entries. */
4227        target = bucket_block(bucket, 0);
4228        offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4229        size = count * sizeof(struct ocfs2_xattr_entry);
4230        memcpy(target + offset, (char *)xb_xh + offset, size);
4231
4232        /* Change the xe offset for all the xe because of the move. */
4233        off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4234                 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4235        for (i = 0; i < count; i++)
4236                le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4237
4238        trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4239
4240        sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4241             cmp_xe, swap_xe);
4242}
4243
4244/*
4245 * After we move xattr from block to index btree, we have to
4246 * update ocfs2_xattr_search to the new xe and base.
4247 *
4248 * When the entry is in xattr block, xattr_bh indicates the storage place.
4249 * While if the entry is in index b-tree, "bucket" indicates the
4250 * real place of the xattr.
4251 */
4252static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4253                                            struct ocfs2_xattr_search *xs,
4254                                            struct buffer_head *old_bh)
4255{
4256        char *buf = old_bh->b_data;
4257        struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4258        struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4259        int i;
4260
4261        xs->header = bucket_xh(xs->bucket);
4262        xs->base = bucket_block(xs->bucket, 0);
4263        xs->end = xs->base + inode->i_sb->s_blocksize;
4264
4265        if (xs->not_found)
4266                return;
4267
4268        i = xs->here - old_xh->xh_entries;
4269        xs->here = &xs->header->xh_entries[i];
4270}
4271
4272static int ocfs2_xattr_create_index_block(struct inode *inode,
4273                                          struct ocfs2_xattr_search *xs,
4274                                          struct ocfs2_xattr_set_ctxt *ctxt)
4275{
4276        int ret;
4277        u32 bit_off, len;
4278        u64 blkno;
4279        handle_t *handle = ctxt->handle;
4280        struct ocfs2_inode_info *oi = OCFS2_I(inode);
4281        struct buffer_head *xb_bh = xs->xattr_bh;
4282        struct ocfs2_xattr_block *xb =
4283                        (struct ocfs2_xattr_block *)xb_bh->b_data;
4284        struct ocfs2_xattr_tree_root *xr;
4285        u16 xb_flags = le16_to_cpu(xb->xb_flags);
4286
4287        trace_ocfs2_xattr_create_index_block_begin(
4288                                (unsigned long long)xb_bh->b_blocknr);
4289
4290        BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4291        BUG_ON(!xs->bucket);
4292
4293        /*
4294         * XXX:
4295         * We can use this lock for now, and maybe move to a dedicated mutex
4296         * if performance becomes a problem later.
4297         */
4298        down_write(&oi->ip_alloc_sem);
4299
4300        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4301                                      OCFS2_JOURNAL_ACCESS_WRITE);
4302        if (ret) {
4303                mlog_errno(ret);
4304                goto out;
4305        }
4306
4307        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4308                                     1, 1, &bit_off, &len);
4309        if (ret) {
4310                mlog_errno(ret);
4311                goto out;
4312        }
4313
4314        /*
4315         * The bucket may spread in many blocks, and
4316         * we will only touch the 1st block and the last block
4317         * in the whole bucket(one for entry and one for data).
4318         */
4319        blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4320
4321        trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4322
4323        ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4324        if (ret) {
4325                mlog_errno(ret);
4326                goto out;
4327        }
4328
4329        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4330                                                OCFS2_JOURNAL_ACCESS_CREATE);
4331        if (ret) {
4332                mlog_errno(ret);
4333                goto out;
4334        }
4335
4336        ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4337        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4338
4339        ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4340
4341        /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4342        memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4343               offsetof(struct ocfs2_xattr_block, xb_attrs));
4344
4345        xr = &xb->xb_attrs.xb_root;
4346        xr->xt_clusters = cpu_to_le32(1);
4347        xr->xt_last_eb_blk = 0;
4348        xr->xt_list.l_tree_depth = 0;
4349        xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4350        xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4351
4352        xr->xt_list.l_recs[0].e_cpos = 0;
4353        xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4354        xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4355
4356        xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4357
4358        ocfs2_journal_dirty(handle, xb_bh);
4359
4360out:
4361        up_write(&oi->ip_alloc_sem);
4362
4363        return ret;
4364}
4365
4366static int cmp_xe_offset(const void *a, const void *b)
4367{
4368        const struct ocfs2_xattr_entry *l = a, *r = b;
4369        u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4370        u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4371
4372        if (l_name_offset < r_name_offset)
4373                return 1;
4374        if (l_name_offset > r_name_offset)
4375                return -1;
4376        return 0;
4377}
4378
4379/*
4380 * defrag a xattr bucket if we find that the bucket has some
4381 * holes beteen name/value pairs.
4382 * We will move all the name/value pairs to the end of the bucket
4383 * so that we can spare some space for insertion.
4384 */
4385static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4386                                     handle_t *handle,
4387                                     struct ocfs2_xattr_bucket *bucket)
4388{
4389        int ret, i;
4390        size_t end, offset, len;
4391        struct ocfs2_xattr_header *xh;
4392        char *entries, *buf, *bucket_buf = NULL;
4393        u64 blkno = bucket_blkno(bucket);
4394        u16 xh_free_start;
4395        size_t blocksize = inode->i_sb->s_blocksize;
4396        struct ocfs2_xattr_entry *xe;
4397
4398        /*
4399         * In order to make the operation more efficient and generic,
4400         * we copy all the blocks into a contiguous memory and do the
4401         * defragment there, so if anything is error, we will not touch
4402         * the real block.
4403         */
4404        bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4405        if (!bucket_buf) {
4406                ret = -EIO;
4407                goto out;
4408        }
4409
4410        buf = bucket_buf;
4411        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4412                memcpy(buf, bucket_block(bucket, i), blocksize);
4413
4414        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4415                                                OCFS2_JOURNAL_ACCESS_WRITE);
4416        if (ret < 0) {
4417                mlog_errno(ret);
4418                goto out;
4419        }
4420
4421        xh = (struct ocfs2_xattr_header *)bucket_buf;
4422        entries = (char *)xh->xh_entries;
4423        xh_free_start = le16_to_cpu(xh->xh_free_start);
4424
4425        trace_ocfs2_defrag_xattr_bucket(
4426             (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4427             xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4428
4429        /*
4430         * sort all the entries by their offset.
4431         * the largest will be the first, so that we can
4432         * move them to the end one by one.
4433         */
4434        sort(entries, le16_to_cpu(xh->xh_count),
4435             sizeof(struct ocfs2_xattr_entry),
4436             cmp_xe_offset, swap_xe);
4437
4438        /* Move all name/values to the end of the bucket. */
4439        xe = xh->xh_entries;
4440        end = OCFS2_XATTR_BUCKET_SIZE;
4441        for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4442                offset = le16_to_cpu(xe->xe_name_offset);
4443                len = namevalue_size_xe(xe);
4444
4445                /*
4446                 * We must make sure that the name/value pair
4447                 * exist in the same block. So adjust end to
4448                 * the previous block end if needed.
4449                 */
4450                if (((end - len) / blocksize !=
4451                        (end - 1) / blocksize))
4452                        end = end - end % blocksize;
4453
4454                if (end > offset + len) {
4455                        memmove(bucket_buf + end - len,
4456                                bucket_buf + offset, len);
4457                        xe->xe_name_offset = cpu_to_le16(end - len);
4458                }
4459
4460                mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4461                                "bucket %llu\n", (unsigned long long)blkno);
4462
4463                end -= len;
4464        }
4465
4466        mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4467                        "bucket %llu\n", (unsigned long long)blkno);
4468
4469        if (xh_free_start == end)
4470                goto out;
4471
4472        memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4473        xh->xh_free_start = cpu_to_le16(end);
4474
4475        /* sort the entries by their name_hash. */
4476        sort(entries, le16_to_cpu(xh->xh_count),
4477             sizeof(struct ocfs2_xattr_entry),
4478             cmp_xe, swap_xe);
4479
4480        buf = bucket_buf;
4481        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4482                memcpy(bucket_block(bucket, i), buf, blocksize);
4483        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4484
4485out:
4486        kfree(bucket_buf);
4487        return ret;
4488}
4489
4490/*
4491 * prev_blkno points to the start of an existing extent.  new_blkno
4492 * points to a newly allocated extent.  Because we know each of our
4493 * clusters contains more than bucket, we can easily split one cluster
4494 * at a bucket boundary.  So we take the last cluster of the existing
4495 * extent and split it down the middle.  We move the last half of the
4496 * buckets in the last cluster of the existing extent over to the new
4497 * extent.
4498 *
4499 * first_bh is the buffer at prev_blkno so we can update the existing
4500 * extent's bucket count.  header_bh is the bucket were we were hoping
4501 * to insert our xattr.  If the bucket move places the target in the new
4502 * extent, we'll update first_bh and header_bh after modifying the old
4503 * extent.
4504 *
4505 * first_hash will be set as the 1st xe's name_hash in the new extent.
4506 */
4507static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4508                                               handle_t *handle,
4509                                               struct ocfs2_xattr_bucket *first,
4510                                               struct ocfs2_xattr_bucket *target,
4511                                               u64 new_blkno,
4512                                               u32 num_clusters,
4513                                               u32 *first_hash)
4514{
4515        int ret;
4516        struct super_block *sb = inode->i_sb;
4517        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4518        int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4519        int to_move = num_buckets / 2;
4520        u64 src_blkno;
4521        u64 last_cluster_blkno = bucket_blkno(first) +
4522                ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4523
4524        BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4525        BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4526
4527        trace_ocfs2_mv_xattr_bucket_cross_cluster(
4528                                (unsigned long long)last_cluster_blkno,
4529                                (unsigned long long)new_blkno);
4530
4531        ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4532                                     last_cluster_blkno, new_blkno,
4533                                     to_move, first_hash);
4534        if (ret) {
4535                mlog_errno(ret);
4536                goto out;
4537        }
4538
4539        /* This is the first bucket that got moved */
4540        src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4541
4542        /*
4543         * If the target bucket was part of the moved buckets, we need to
4544         * update first and target.
4545         */
4546        if (bucket_blkno(target) >= src_blkno) {
4547                /* Find the block for the new target bucket */
4548                src_blkno = new_blkno +
4549                        (bucket_blkno(target) - src_blkno);
4550
4551                ocfs2_xattr_bucket_relse(first);
4552                ocfs2_xattr_bucket_relse(target);
4553
4554                /*
4555                 * These shouldn't fail - the buffers are in the
4556                 * journal from ocfs2_cp_xattr_bucket().
4557                 */
4558                ret = ocfs2_read_xattr_bucket(first, new_blkno);
4559                if (ret) {
4560                        mlog_errno(ret);
4561                        goto out;
4562                }
4563                ret = ocfs2_read_xattr_bucket(target, src_blkno);
4564                if (ret)
4565                        mlog_errno(ret);
4566
4567        }
4568
4569out:
4570        return ret;
4571}
4572
4573/*
4574 * Find the suitable pos when we divide a bucket into 2.
4575 * We have to make sure the xattrs with the same hash value exist
4576 * in the same bucket.
4577 *
4578 * If this ocfs2_xattr_header covers more than one hash value, find a
4579 * place where the hash value changes.  Try to find the most even split.
4580 * The most common case is that all entries have different hash values,
4581 * and the first check we make will find a place to split.
4582 */
4583static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4584{
4585        struct ocfs2_xattr_entry *entries = xh->xh_entries;
4586        int count = le16_to_cpu(xh->xh_count);
4587        int delta, middle = count / 2;
4588
4589        /*
4590         * We start at the middle.  Each step gets farther away in both
4591         * directions.  We therefore hit the change in hash value
4592         * nearest to the middle.  Note that this loop does not execute for
4593         * count < 2.
4594         */
4595        for (delta = 0; delta < middle; delta++) {
4596                /* Let's check delta earlier than middle */
4597                if (cmp_xe(&entries[middle - delta - 1],
4598                           &entries[middle - delta]))
4599                        return middle - delta;
4600
4601                /* For even counts, don't walk off the end */
4602                if ((middle + delta + 1) == count)
4603                        continue;
4604
4605                /* Now try delta past middle */
4606                if (cmp_xe(&entries[middle + delta],
4607                           &entries[middle + delta + 1]))
4608                        return middle + delta + 1;
4609        }
4610
4611        /* Every entry had the same hash */
4612        return count;
4613}
4614
4615/*
4616 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4617 * first_hash will record the 1st hash of the new bucket.
4618 *
4619 * Normally half of the xattrs will be moved.  But we have to make
4620 * sure that the xattrs with the same hash value are stored in the
4621 * same bucket. If all the xattrs in this bucket have the same hash
4622 * value, the new bucket will be initialized as an empty one and the
4623 * first_hash will be initialized as (hash_value+1).
4624 */
4625static int ocfs2_divide_xattr_bucket(struct inode *inode,
4626                                    handle_t *handle,
4627                                    u64 blk,
4628                                    u64 new_blk,
4629                                    u32 *first_hash,
4630                                    int new_bucket_head)
4631{
4632        int ret, i;
4633        int count, start, len, name_value_len = 0, name_offset = 0;
4634        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4635        struct ocfs2_xattr_header *xh;
4636        struct ocfs2_xattr_entry *xe;
4637        int blocksize = inode->i_sb->s_blocksize;
4638
4639        trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4640                                              (unsigned long long)new_blk);
4641
4642        s_bucket = ocfs2_xattr_bucket_new(inode);
4643        t_bucket = ocfs2_xattr_bucket_new(inode);
4644        if (!s_bucket || !t_bucket) {
4645                ret = -ENOMEM;
4646                mlog_errno(ret);
4647                goto out;
4648        }
4649
4650        ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4651        if (ret) {
4652                mlog_errno(ret);
4653                goto out;
4654        }
4655
4656        ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4657                                                OCFS2_JOURNAL_ACCESS_WRITE);
4658        if (ret) {
4659                mlog_errno(ret);
4660                goto out;
4661        }
4662
4663        /*
4664         * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4665         * there's no need to read it.
4666         */
4667        ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4668        if (ret) {
4669                mlog_errno(ret);
4670                goto out;
4671        }
4672
4673        /*
4674         * Hey, if we're overwriting t_bucket, what difference does
4675         * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4676         * same part of ocfs2_cp_xattr_bucket().
4677         */
4678        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4679                                                new_bucket_head ?
4680                                                OCFS2_JOURNAL_ACCESS_CREATE :
4681                                                OCFS2_JOURNAL_ACCESS_WRITE);
4682        if (ret) {
4683                mlog_errno(ret);
4684                goto out;
4685        }
4686
4687        xh = bucket_xh(s_bucket);
4688        count = le16_to_cpu(xh->xh_count);
4689        start = ocfs2_xattr_find_divide_pos(xh);
4690
4691        if (start == count) {
4692                xe = &xh->xh_entries[start-1];
4693
4694                /*
4695                 * initialized a new empty bucket here.
4696                 * The hash value is set as one larger than
4697                 * that of the last entry in the previous bucket.
4698                 */
4699                for (i = 0; i < t_bucket->bu_blocks; i++)
4700                        memset(bucket_block(t_bucket, i), 0, blocksize);
4701
4702                xh = bucket_xh(t_bucket);
4703                xh->xh_free_start = cpu_to_le16(blocksize);
4704                xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4705                le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4706
4707                goto set_num_buckets;
4708        }
4709
4710        /* copy the whole bucket to the new first. */
4711        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4712
4713        /* update the new bucket. */
4714        xh = bucket_xh(t_bucket);
4715
4716        /*
4717         * Calculate the total name/value len and xh_free_start for
4718         * the old bucket first.
4719         */
4720        name_offset = OCFS2_XATTR_BUCKET_SIZE;
4721        name_value_len = 0;
4722        for (i = 0; i < start; i++) {
4723                xe = &xh->xh_entries[i];
4724                name_value_len += namevalue_size_xe(xe);
4725                if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4726                        name_offset = le16_to_cpu(xe->xe_name_offset);
4727        }
4728
4729        /*
4730         * Now begin the modification to the new bucket.
4731         *
4732         * In the new bucket, We just move the xattr entry to the beginning
4733         * and don't touch the name/value. So there will be some holes in the
4734         * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4735         * called.
4736         */
4737        xe = &xh->xh_entries[start];
4738        len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4739        trace_ocfs2_divide_xattr_bucket_move(len,
4740                        (int)((char *)xe - (char *)xh),
4741                        (int)((char *)xh->xh_entries - (char *)xh));
4742        memmove((char *)xh->xh_entries, (char *)xe, len);
4743        xe = &xh->xh_entries[count - start];
4744        len = sizeof(struct ocfs2_xattr_entry) * start;
4745        memset((char *)xe, 0, len);
4746
4747        le16_add_cpu(&xh->xh_count, -start);
4748        le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4749
4750        /* Calculate xh_free_start for the new bucket. */
4751        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4752        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4753                xe = &xh->xh_entries[i];
4754                if (le16_to_cpu(xe->xe_name_offset) <
4755                    le16_to_cpu(xh->xh_free_start))
4756                        xh->xh_free_start = xe->xe_name_offset;
4757        }
4758
4759set_num_buckets:
4760        /* set xh->xh_num_buckets for the new xh. */
4761        if (new_bucket_head)
4762                xh->xh_num_buckets = cpu_to_le16(1);
4763        else
4764                xh->xh_num_buckets = 0;
4765
4766        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4767
4768        /* store the first_hash of the new bucket. */
4769        if (first_hash)
4770                *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4771
4772        /*
4773         * Now only update the 1st block of the old bucket.  If we
4774         * just added a new empty bucket, there is no need to modify
4775         * it.
4776         */
4777        if (start == count)
4778                goto out;
4779
4780        xh = bucket_xh(s_bucket);
4781        memset(&xh->xh_entries[start], 0,
4782               sizeof(struct ocfs2_xattr_entry) * (count - start));
4783        xh->xh_count = cpu_to_le16(start);
4784        xh->xh_free_start = cpu_to_le16(name_offset);
4785        xh->xh_name_value_len = cpu_to_le16(name_value_len);
4786
4787        ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4788
4789out:
4790        ocfs2_xattr_bucket_free(s_bucket);
4791        ocfs2_xattr_bucket_free(t_bucket);
4792
4793        return ret;
4794}
4795
4796/*
4797 * Copy xattr from one bucket to another bucket.
4798 *
4799 * The caller must make sure that the journal transaction
4800 * has enough space for journaling.
4801 */
4802static int ocfs2_cp_xattr_bucket(struct inode *inode,
4803                                 handle_t *handle,
4804                                 u64 s_blkno,
4805                                 u64 t_blkno,
4806                                 int t_is_new)
4807{
4808        int ret;
4809        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4810
4811        BUG_ON(s_blkno == t_blkno);
4812
4813        trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4814                                    (unsigned long long)t_blkno,
4815                                    t_is_new);
4816
4817        s_bucket = ocfs2_xattr_bucket_new(inode);
4818        t_bucket = ocfs2_xattr_bucket_new(inode);
4819        if (!s_bucket || !t_bucket) {
4820                ret = -ENOMEM;
4821                mlog_errno(ret);
4822                goto out;
4823        }
4824
4825        ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4826        if (ret)
4827                goto out;
4828
4829        /*
4830         * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4831         * there's no need to read it.
4832         */
4833        ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4834        if (ret)
4835                goto out;
4836
4837        /*
4838         * Hey, if we're overwriting t_bucket, what difference does
4839         * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4840         * cluster to fill, we came here from
4841         * ocfs2_mv_xattr_buckets(), and it is really new -
4842         * ACCESS_CREATE is required.  But we also might have moved data
4843         * out of t_bucket before extending back into it.
4844         * ocfs2_add_new_xattr_bucket() can do this - its call to
4845         * ocfs2_add_new_xattr_cluster() may have created a new extent
4846         * and copied out the end of the old extent.  Then it re-extends
4847         * the old extent back to create space for new xattrs.  That's
4848         * how we get here, and the bucket isn't really new.
4849         */
4850        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4851                                                t_is_new ?
4852                                                OCFS2_JOURNAL_ACCESS_CREATE :
4853                                                OCFS2_JOURNAL_ACCESS_WRITE);
4854        if (ret)
4855                goto out;
4856
4857        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4858        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4859
4860out:
4861        ocfs2_xattr_bucket_free(t_bucket);
4862        ocfs2_xattr_bucket_free(s_bucket);
4863
4864        return ret;
4865}
4866
4867/*
4868 * src_blk points to the start of an existing extent.  last_blk points to
4869 * last cluster in that extent.  to_blk points to a newly allocated
4870 * extent.  We copy the buckets from the cluster at last_blk to the new
4871 * extent.  If start_bucket is non-zero, we skip that many buckets before
4872 * we start copying.  The new extent's xh_num_buckets gets set to the
4873 * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4874 * by the same amount.
4875 */
4876static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4877                                  u64 src_blk, u64 last_blk, u64 to_blk,
4878                                  unsigned int start_bucket,
4879                                  u32 *first_hash)
4880{
4881        int i, ret, credits;
4882        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4883        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4884        int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4885        struct ocfs2_xattr_bucket *old_first, *new_first;
4886
4887        trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4888                                     (unsigned long long)to_blk);
4889
4890        BUG_ON(start_bucket >= num_buckets);
4891        if (start_bucket) {
4892                num_buckets -= start_bucket;
4893                last_blk += (start_bucket * blks_per_bucket);
4894        }
4895
4896        /* The first bucket of the original extent */
4897        old_first = ocfs2_xattr_bucket_new(inode);
4898        /* The first bucket of the new extent */
4899        new_first = ocfs2_xattr_bucket_new(inode);
4900        if (!old_first || !new_first) {
4901                ret = -ENOMEM;
4902                mlog_errno(ret);
4903                goto out;
4904        }
4905
4906        ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4907        if (ret) {
4908                mlog_errno(ret);
4909                goto out;
4910        }
4911
4912        /*
4913         * We need to update the first bucket of the old extent and all
4914         * the buckets going to the new extent.
4915         */
4916        credits = ((num_buckets + 1) * blks_per_bucket);
4917        ret = ocfs2_extend_trans(handle, credits);
4918        if (ret) {
4919                mlog_errno(ret);
4920                goto out;
4921        }
4922
4923        ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4924                                                OCFS2_JOURNAL_ACCESS_WRITE);
4925        if (ret) {
4926                mlog_errno(ret);
4927                goto out;
4928        }
4929
4930        for (i = 0; i < num_buckets; i++) {
4931                ret = ocfs2_cp_xattr_bucket(inode, handle,
4932                                            last_blk + (i * blks_per_bucket),
4933                                            to_blk + (i * blks_per_bucket),
4934                                            1);
4935                if (ret) {
4936                        mlog_errno(ret);
4937                        goto out;
4938                }
4939        }
4940
4941        /*
4942         * Get the new bucket ready before we dirty anything
4943         * (This actually shouldn't fail, because we already dirtied
4944         * it once in ocfs2_cp_xattr_bucket()).
4945         */
4946        ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4947        if (ret) {
4948                mlog_errno(ret);
4949                goto out;
4950        }
4951        ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4952                                                OCFS2_JOURNAL_ACCESS_WRITE);
4953        if (ret) {
4954                mlog_errno(ret);
4955                goto out;
4956        }
4957
4958        /* Now update the headers */
4959        le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4960        ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4961
4962        bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4963        ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4964
4965        if (first_hash)
4966                *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4967
4968out:
4969        ocfs2_xattr_bucket_free(new_first);
4970        ocfs2_xattr_bucket_free(old_first);
4971        return ret;
4972}
4973
4974/*
4975 * Move some xattrs in this cluster to the new cluster.
4976 * This function should only be called when bucket size == cluster size.
4977 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4978 */
4979static int ocfs2_divide_xattr_cluster(struct inode *inode,
4980                                      handle_t *handle,
4981                                      u64 prev_blk,
4982                                      u64 new_blk,
4983                                      u32 *first_hash)
4984{
4985        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4986        int ret, credits = 2 * blk_per_bucket;
4987
4988        BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4989
4990        ret = ocfs2_extend_trans(handle, credits);
4991        if (ret) {
4992                mlog_errno(ret);
4993                return ret;
4994        }
4995
4996        /* Move half of the xattr in start_blk to the next bucket. */
4997        return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4998                                          new_blk, first_hash, 1);
4999}
5000
5001/*
5002 * Move some xattrs from the old cluster to the new one since they are not
5003 * contiguous in ocfs2 xattr tree.
5004 *
5005 * new_blk starts a new separate cluster, and we will move some xattrs from
5006 * prev_blk to it. v_start will be set as the first name hash value in this
5007 * new cluster so that it can be used as e_cpos during tree insertion and
5008 * don't collide with our original b-tree operations. first_bh and header_bh
5009 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
5010 * to extend the insert bucket.
5011 *
5012 * The problem is how much xattr should we move to the new one and when should
5013 * we update first_bh and header_bh?
5014 * 1. If cluster size > bucket size, that means the previous cluster has more
5015 *    than 1 bucket, so just move half nums of bucket into the new cluster and
5016 *    update the first_bh and header_bh if the insert bucket has been moved
5017 *    to the new cluster.
5018 * 2. If cluster_size == bucket_size:
5019 *    a) If the previous extent rec has more than one cluster and the insert
5020 *       place isn't in the last cluster, copy the entire last cluster to the
5021 *       new one. This time, we don't need to upate the first_bh and header_bh
5022 *       since they will not be moved into the new cluster.
5023 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5024 *       the new one. And we set the extend flag to zero if the insert place is
5025 *       moved into the new allocated cluster since no extend is needed.
5026 */
5027static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5028                                            handle_t *handle,
5029                                            struct ocfs2_xattr_bucket *first,
5030                                            struct ocfs2_xattr_bucket *target,
5031                                            u64 new_blk,
5032                                            u32 prev_clusters,
5033                                            u32 *v_start,
5034                                            int *extend)
5035{
5036        int ret;
5037
5038        trace_ocfs2_adjust_xattr_cross_cluster(
5039                        (unsigned long long)bucket_blkno(first),
5040                        (unsigned long long)new_blk, prev_clusters);
5041
5042        if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5043                ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5044                                                          handle,
5045                                                          first, target,
5046                                                          new_blk,
5047                                                          prev_clusters,
5048                                                          v_start);
5049                if (ret)
5050                        mlog_errno(ret);
5051        } else {
5052                /* The start of the last cluster in the first extent */
5053                u64 last_blk = bucket_blkno(first) +
5054                        ((prev_clusters - 1) *
5055                         ocfs2_clusters_to_blocks(inode->i_sb, 1));
5056
5057                if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5058                        ret = ocfs2_mv_xattr_buckets(inode, handle,
5059                                                     bucket_blkno(first),
5060                                                     last_blk, new_blk, 0,
5061                                                     v_start);
5062                        if (ret)
5063                                mlog_errno(ret);
5064                } else {
5065                        ret = ocfs2_divide_xattr_cluster(inode, handle,
5066                                                         last_blk, new_blk,
5067                                                         v_start);
5068                        if (ret)
5069                                mlog_errno(ret);
5070
5071                        if ((bucket_blkno(target) == last_blk) && extend)
5072                                *extend = 0;
5073                }
5074        }
5075
5076        return ret;
5077}
5078
5079/*
5080 * Add a new cluster for xattr storage.
5081 *
5082 * If the new cluster is contiguous with the previous one, it will be
5083 * appended to the same extent record, and num_clusters will be updated.
5084 * If not, we will insert a new extent for it and move some xattrs in
5085 * the last cluster into the new allocated one.
5086 * We also need to limit the maximum size of a btree leaf, otherwise we'll
5087 * lose the benefits of hashing because we'll have to search large leaves.
5088 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5089 * if it's bigger).
5090 *
5091 * first_bh is the first block of the previous extent rec and header_bh
5092 * indicates the bucket we will insert the new xattrs. They will be updated
5093 * when the header_bh is moved into the new cluster.
5094 */
5095static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5096                                       struct buffer_head *root_bh,
5097                                       struct ocfs2_xattr_bucket *first,
5098                                       struct ocfs2_xattr_bucket *target,
5099                                       u32 *num_clusters,
5100                                       u32 prev_cpos,
5101                                       int *extend,
5102                                       struct ocfs2_xattr_set_ctxt *ctxt)
5103{
5104        int ret;
5105        u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5106        u32 prev_clusters = *num_clusters;
5107        u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5108        u64 block;
5109        handle_t *handle = ctxt->handle;
5110        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5111        struct ocfs2_extent_tree et;
5112
5113        trace_ocfs2_add_new_xattr_cluster_begin(
5114                (unsigned long long)OCFS2_I(inode)->ip_blkno,
5115                (unsigned long long)bucket_blkno(first),
5116                prev_cpos, prev_clusters);
5117
5118        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5119
5120        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5121                                      OCFS2_JOURNAL_ACCESS_WRITE);
5122        if (ret < 0) {
5123                mlog_errno(ret);
5124                goto leave;
5125        }
5126
5127        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5128                                     clusters_to_add, &bit_off, &num_bits);
5129        if (ret < 0) {
5130                if (ret != -ENOSPC)
5131                        mlog_errno(ret);
5132                goto leave;
5133        }
5134
5135        BUG_ON(num_bits > clusters_to_add);
5136
5137        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5138        trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5139
5140        if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5141            (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5142             OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5143                /*
5144                 * If this cluster is contiguous with the old one and
5145                 * adding this new cluster, we don't surpass the limit of
5146                 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5147                 * initialized and used like other buckets in the previous
5148                 * cluster.
5149                 * So add it as a contiguous one. The caller will handle
5150                 * its init process.
5151                 */
5152                v_start = prev_cpos + prev_clusters;
5153                *num_clusters = prev_clusters + num_bits;
5154        } else {
5155                ret = ocfs2_adjust_xattr_cross_cluster(inode,
5156                                                       handle,
5157                                                       first,
5158                                                       target,
5159                                                       block,
5160                                                       prev_clusters,
5161                                                       &v_start,
5162                                                       extend);
5163                if (ret) {
5164                        mlog_errno(ret);
5165                        goto leave;
5166                }
5167        }
5168
5169        trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5170                                                 v_start, num_bits);
5171        ret = ocfs2_insert_extent(handle, &et, v_start, block,
5172                                  num_bits, 0, ctxt->meta_ac);
5173        if (ret < 0) {
5174                mlog_errno(ret);
5175                goto leave;
5176        }
5177
5178        ocfs2_journal_dirty(handle, root_bh);
5179
5180leave:
5181        return ret;
5182}
5183
5184/*
5185 * We are given an extent.  'first' is the bucket at the very front of
5186 * the extent.  The extent has space for an additional bucket past
5187 * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5188 * of the target bucket.  We wish to shift every bucket past the target
5189 * down one, filling in that additional space.  When we get back to the
5190 * target, we split the target between itself and the now-empty bucket
5191 * at target+1 (aka, target_blkno + blks_per_bucket).
5192 */
5193static int ocfs2_extend_xattr_bucket(struct inode *inode,
5194                                     handle_t *handle,
5195                                     struct ocfs2_xattr_bucket *first,
5196                                     u64 target_blk,
5197                                     u32 num_clusters)
5198{
5199        int ret, credits;
5200        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5201        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5202        u64 end_blk;
5203        u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5204
5205        trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5206                                        (unsigned long long)bucket_blkno(first),
5207                                        num_clusters, new_bucket);
5208
5209        /* The extent must have room for an additional bucket */
5210        BUG_ON(new_bucket >=
5211               (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5212
5213        /* end_blk points to the last existing bucket */
5214        end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5215
5216        /*
5217         * end_blk is the start of the last existing bucket.
5218         * Thus, (end_blk - target_blk) covers the target bucket and
5219         * every bucket after it up to, but not including, the last
5220         * existing bucket.  Then we add the last existing bucket, the
5221         * new bucket, and the first bucket (3 * blk_per_bucket).
5222         */
5223        credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5224        ret = ocfs2_extend_trans(handle, credits);
5225        if (ret) {
5226                mlog_errno(ret);
5227                goto out;
5228        }
5229
5230        ret = ocfs2_xattr_bucket_journal_access(handle, first,
5231                                                OCFS2_JOURNAL_ACCESS_WRITE);
5232        if (ret) {
5233                mlog_errno(ret);
5234                goto out;
5235        }
5236
5237        while (end_blk != target_blk) {
5238                ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5239                                            end_blk + blk_per_bucket, 0);
5240                if (ret)
5241                        goto out;
5242                end_blk -= blk_per_bucket;
5243        }
5244
5245        /* Move half of the xattr in target_blkno to the next bucket. */
5246        ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5247                                        target_blk + blk_per_bucket, NULL, 0);
5248
5249        le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5250        ocfs2_xattr_bucket_journal_dirty(handle, first);
5251
5252out:
5253        return ret;
5254}
5255
5256/*
5257 * Add new xattr bucket in an extent record and adjust the buckets
5258 * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5259 * bucket we want to insert into.
5260 *
5261 * In the easy case, we will move all the buckets after target down by
5262 * one. Half of target's xattrs will be moved to the next bucket.
5263 *
5264 * If current cluster is full, we'll allocate a new one.  This may not
5265 * be contiguous.  The underlying calls will make sure that there is
5266 * space for the insert, shifting buckets around if necessary.
5267 * 'target' may be moved by those calls.
5268 */
5269static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5270                                      struct buffer_head *xb_bh,
5271                                      struct ocfs2_xattr_bucket *target,
5272                                      struct ocfs2_xattr_set_ctxt *ctxt)
5273{
5274        struct ocfs2_xattr_block *xb =
5275                        (struct ocfs2_xattr_block *)xb_bh->b_data;
5276        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5277        struct ocfs2_extent_list *el = &xb_root->xt_list;
5278        u32 name_hash =
5279                le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5280        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5281        int ret, num_buckets, extend = 1;
5282        u64 p_blkno;
5283        u32 e_cpos, num_clusters;
5284        /* The bucket at the front of the extent */
5285        struct ocfs2_xattr_bucket *first;
5286
5287        trace_ocfs2_add_new_xattr_bucket(
5288                                (unsigned long long)bucket_blkno(target));
5289
5290        /* The first bucket of the original extent */
5291        first = ocfs2_xattr_bucket_new(inode);
5292        if (!first) {
5293                ret = -ENOMEM;
5294                mlog_errno(ret);
5295                goto out;
5296        }
5297
5298        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5299                                  &num_clusters, el);
5300        if (ret) {
5301                mlog_errno(ret);
5302                goto out;
5303        }
5304
5305        ret = ocfs2_read_xattr_bucket(first, p_blkno);
5306        if (ret) {
5307                mlog_errno(ret);
5308                goto out;
5309        }
5310
5311        num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5312        if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5313                /*
5314                 * This can move first+target if the target bucket moves
5315                 * to the new extent.
5316                 */
5317                ret = ocfs2_add_new_xattr_cluster(inode,
5318                                                  xb_bh,
5319                                                  first,
5320                                                  target,
5321                                                  &num_clusters,
5322                                                  e_cpos,
5323                                                  &extend,
5324                                                  ctxt);
5325                if (ret) {
5326                        mlog_errno(ret);
5327                        goto out;
5328                }
5329        }
5330
5331        if (extend) {
5332                ret = ocfs2_extend_xattr_bucket(inode,
5333                                                ctxt->handle,
5334                                                first,
5335                                                bucket_blkno(target),
5336                                                num_clusters);
5337                if (ret)
5338                        mlog_errno(ret);
5339        }
5340
5341out:
5342        ocfs2_xattr_bucket_free(first);
5343
5344        return ret;
5345}
5346
5347/*
5348 * Truncate the specified xe_off entry in xattr bucket.
5349 * bucket is indicated by header_bh and len is the new length.
5350 * Both the ocfs2_xattr_value_root and the entry will be updated here.
5351 *
5352 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5353 */
5354static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5355                                             struct ocfs2_xattr_bucket *bucket,
5356                                             int xe_off,
5357                                             int len,
5358                                             struct ocfs2_xattr_set_ctxt *ctxt)
5359{
5360        int ret, offset;
5361        u64 value_blk;
5362        struct ocfs2_xattr_entry *xe;
5363        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5364        size_t blocksize = inode->i_sb->s_blocksize;
5365        struct ocfs2_xattr_value_buf vb = {
5366                .vb_access = ocfs2_journal_access,
5367        };
5368
5369        xe = &xh->xh_entries[xe_off];
5370
5371        BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5372
5373        offset = le16_to_cpu(xe->xe_name_offset) +
5374                 OCFS2_XATTR_SIZE(xe->xe_name_len);
5375
5376        value_blk = offset / blocksize;
5377
5378        /* We don't allow ocfs2_xattr_value to be stored in different block. */
5379        BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5380
5381        vb.vb_bh = bucket->bu_bhs[value_blk];
5382        BUG_ON(!vb.vb_bh);
5383
5384        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5385                (vb.vb_bh->b_data + offset % blocksize);
5386
5387        /*
5388         * From here on out we have to dirty the bucket.  The generic
5389         * value calls only modify one of the bucket's bhs, but we need
5390         * to send the bucket at once.  So if they error, they *could* have
5391         * modified something.  We have to assume they did, and dirty
5392         * the whole bucket.  This leaves us in a consistent state.
5393         */
5394        trace_ocfs2_xattr_bucket_value_truncate(
5395                        (unsigned long long)bucket_blkno(bucket), xe_off, len);
5396        ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5397        if (ret) {
5398                mlog_errno(ret);
5399                goto out;
5400        }
5401
5402        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5403                                                OCFS2_JOURNAL_ACCESS_WRITE);
5404        if (ret) {
5405                mlog_errno(ret);
5406                goto out;
5407        }
5408
5409        xe->xe_value_size = cpu_to_le64(len);
5410
5411        ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5412
5413out:
5414        return ret;
5415}
5416
5417static int ocfs2_rm_xattr_cluster(struct inode *inode,
5418                                  struct buffer_head *root_bh,
5419                                  u64 blkno,
5420                                  u32 cpos,
5421                                  u32 len,
5422                                  void *para)
5423{
5424        int ret;
5425        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5426        struct inode *tl_inode = osb->osb_tl_inode;
5427        handle_t *handle;
5428        struct ocfs2_xattr_block *xb =
5429                        (struct ocfs2_xattr_block *)root_bh->b_data;
5430        struct ocfs2_alloc_context *meta_ac = NULL;
5431        struct ocfs2_cached_dealloc_ctxt dealloc;
5432        struct ocfs2_extent_tree et;
5433
5434        ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5435                                          ocfs2_delete_xattr_in_bucket, para);
5436        if (ret) {
5437                mlog_errno(ret);
5438                return ret;
5439        }
5440
5441        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5442
5443        ocfs2_init_dealloc_ctxt(&dealloc);
5444
5445        trace_ocfs2_rm_xattr_cluster(
5446                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
5447                        (unsigned long long)blkno, cpos, len);
5448
5449        ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5450                                               len);
5451
5452        ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5453        if (ret) {
5454                mlog_errno(ret);
5455                return ret;
5456        }
5457
5458        inode_lock(tl_inode);
5459
5460        if (ocfs2_truncate_log_needs_flush(osb)) {
5461                ret = __ocfs2_flush_truncate_log(osb);
5462                if (ret < 0) {
5463                        mlog_errno(ret);
5464                        goto out;
5465                }
5466        }
5467
5468        handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5469        if (IS_ERR(handle)) {
5470                ret = -ENOMEM;
5471                mlog_errno(ret);
5472                goto out;
5473        }
5474
5475        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5476                                      OCFS2_JOURNAL_ACCESS_WRITE);
5477        if (ret) {
5478                mlog_errno(ret);
5479                goto out_commit;
5480        }
5481
5482        ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5483                                  &dealloc);
5484        if (ret) {
5485                mlog_errno(ret);
5486                goto out_commit;
5487        }
5488
5489        le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5490        ocfs2_journal_dirty(handle, root_bh);
5491
5492        ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5493        if (ret)
5494                mlog_errno(ret);
5495        ocfs2_update_inode_fsync_trans(handle, inode, 0);
5496
5497out_commit:
5498        ocfs2_commit_trans(osb, handle);
5499out:
5500        ocfs2_schedule_truncate_log_flush(osb, 1);
5501
5502        inode_unlock(tl_inode);
5503
5504        if (meta_ac)
5505                ocfs2_free_alloc_context(meta_ac);
5506
5507        ocfs2_run_deallocs(osb, &dealloc);
5508
5509        return ret;
5510}
5511
5512/*
5513 * check whether the xattr bucket is filled up with the same hash value.
5514 * If we want to insert the xattr with the same hash, return -ENOSPC.
5515 * If we want to insert a xattr with different hash value, go ahead
5516 * and ocfs2_divide_xattr_bucket will handle this.
5517 */
5518static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5519                                              struct ocfs2_xattr_bucket *bucket,
5520                                              const char *name)
5521{
5522        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5523        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5524
5525        if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5526                return 0;
5527
5528        if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5529            xh->xh_entries[0].xe_name_hash) {
5530                mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5531                     "hash = %u\n",
5532                     (unsigned long long)bucket_blkno(bucket),
5533                     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5534                return -ENOSPC;
5535        }
5536
5537        return 0;
5538}
5539
5540/*
5541 * Try to set the entry in the current bucket.  If we fail, the caller
5542 * will handle getting us another bucket.
5543 */
5544static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5545                                        struct ocfs2_xattr_info *xi,
5546                                        struct ocfs2_xattr_search *xs,
5547                                        struct ocfs2_xattr_set_ctxt *ctxt)
5548{
5549        int ret;
5550        struct ocfs2_xa_loc loc;
5551
5552        trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5553
5554        ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5555                                       xs->not_found ? NULL : xs->here);
5556        ret = ocfs2_xa_set(&loc, xi, ctxt);
5557        if (!ret) {
5558                xs->here = loc.xl_entry;
5559                goto out;
5560        }
5561        if (ret != -ENOSPC) {
5562                mlog_errno(ret);
5563                goto out;
5564        }
5565
5566        /* Ok, we need space.  Let's try defragmenting the bucket. */
5567        ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5568                                        xs->bucket);
5569        if (ret) {
5570                mlog_errno(ret);
5571                goto out;
5572        }
5573
5574        ret = ocfs2_xa_set(&loc, xi, ctxt);
5575        if (!ret) {
5576                xs->here = loc.xl_entry;
5577                goto out;
5578        }
5579        if (ret != -ENOSPC)
5580                mlog_errno(ret);
5581
5582
5583out:
5584        return ret;
5585}
5586
5587static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5588                                             struct ocfs2_xattr_info *xi,
5589                                             struct ocfs2_xattr_search *xs,
5590                                             struct ocfs2_xattr_set_ctxt *ctxt)
5591{
5592        int ret;
5593
5594        trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5595
5596        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5597        if (!ret)
5598                goto out;
5599        if (ret != -ENOSPC) {
5600                mlog_errno(ret);
5601                goto out;
5602        }
5603
5604        /* Ack, need more space.  Let's try to get another bucket! */
5605
5606        /*
5607         * We do not allow for overlapping ranges between buckets. And
5608         * the maximum number of collisions we will allow for then is
5609         * one bucket's worth, so check it here whether we need to
5610         * add a new bucket for the insert.
5611         */
5612        ret = ocfs2_check_xattr_bucket_collision(inode,
5613                                                 xs->bucket,
5614                                                 xi->xi_name);
5615        if (ret) {
5616                mlog_errno(ret);
5617                goto out;
5618        }
5619
5620        ret = ocfs2_add_new_xattr_bucket(inode,
5621                                         xs->xattr_bh,
5622                                         xs->bucket,
5623                                         ctxt);
5624        if (ret) {
5625                mlog_errno(ret);
5626                goto out;
5627        }
5628
5629        /*
5630         * ocfs2_add_new_xattr_bucket() will have updated
5631         * xs->bucket if it moved, but it will not have updated
5632         * any of the other search fields.  Thus, we drop it and
5633         * re-search.  Everything should be cached, so it'll be
5634         * quick.
5635         */
5636        ocfs2_xattr_bucket_relse(xs->bucket);
5637        ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5638                                           xi->xi_name_index,
5639                                           xi->xi_name, xs);
5640        if (ret && ret != -ENODATA)
5641                goto out;
5642        xs->not_found = ret;
5643
5644        /* Ok, we have a new bucket, let's try again */
5645        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5646        if (ret && (ret != -ENOSPC))
5647                mlog_errno(ret);
5648
5649out:
5650        return ret;
5651}
5652
5653static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5654                                        struct ocfs2_xattr_bucket *bucket,
5655                                        void *para)
5656{
5657        int ret = 0, ref_credits;
5658        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5659        u16 i;
5660        struct ocfs2_xattr_entry *xe;
5661        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5662        struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5663        int credits = ocfs2_remove_extent_credits(osb->sb) +
5664                ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5665        struct ocfs2_xattr_value_root *xv;
5666        struct ocfs2_rm_xattr_bucket_para *args =
5667                        (struct ocfs2_rm_xattr_bucket_para *)para;
5668
5669        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5670
5671        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5672                xe = &xh->xh_entries[i];
5673                if (ocfs2_xattr_is_local(xe))
5674                        continue;
5675
5676                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5677                                                      i, &xv, NULL);
5678                if (ret) {
5679                        mlog_errno(ret);
5680                        break;
5681                }
5682
5683                ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5684                                                         args->ref_ci,
5685                                                         args->ref_root_bh,
5686                                                         &ctxt.meta_ac,
5687                                                         &ref_credits);
5688
5689                ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5690                if (IS_ERR(ctxt.handle)) {
5691                        ret = PTR_ERR(ctxt.handle);
5692                        mlog_errno(ret);
5693                        break;
5694                }
5695
5696                ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5697                                                        i, 0, &ctxt);
5698
5699                ocfs2_commit_trans(osb, ctxt.handle);
5700                if (ctxt.meta_ac) {
5701                        ocfs2_free_alloc_context(ctxt.meta_ac);
5702                        ctxt.meta_ac = NULL;
5703                }
5704                if (ret) {
5705                        mlog_errno(ret);
5706                        break;
5707                }
5708        }
5709
5710        if (ctxt.meta_ac)
5711                ocfs2_free_alloc_context(ctxt.meta_ac);
5712        ocfs2_schedule_truncate_log_flush(osb, 1);
5713        ocfs2_run_deallocs(osb, &ctxt.dealloc);
5714        return ret;
5715}
5716
5717/*
5718 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5719 * or change the extent record flag), we need to recalculate
5720 * the metaecc for the whole bucket. So it is done here.
5721 *
5722 * Note:
5723 * We have to give the extra credits for the caller.
5724 */
5725static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5726                                            handle_t *handle,
5727                                            void *para)
5728{
5729        int ret;
5730        struct ocfs2_xattr_bucket *bucket =
5731                        (struct ocfs2_xattr_bucket *)para;
5732
5733        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5734                                                OCFS2_JOURNAL_ACCESS_WRITE);
5735        if (ret) {
5736                mlog_errno(ret);
5737                return ret;
5738        }
5739
5740        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5741
5742        return 0;
5743}
5744
5745/*
5746 * Special action we need if the xattr value is refcounted.
5747 *
5748 * 1. If the xattr is refcounted, lock the tree.
5749 * 2. CoW the xattr if we are setting the new value and the value
5750 *    will be stored outside.
5751 * 3. In other case, decrease_refcount will work for us, so just
5752 *    lock the refcount tree, calculate the meta and credits is OK.
5753 *
5754 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5755 * currently CoW is a completed transaction, while this function
5756 * will also lock the allocators and let us deadlock. So we will
5757 * CoW the whole xattr value.
5758 */
5759static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5760                                        struct ocfs2_dinode *di,
5761                                        struct ocfs2_xattr_info *xi,
5762                                        struct ocfs2_xattr_search *xis,
5763                                        struct ocfs2_xattr_search *xbs,
5764                                        struct ocfs2_refcount_tree **ref_tree,
5765                                        int *meta_add,
5766                                        int *credits)
5767{
5768        int ret = 0;
5769        struct ocfs2_xattr_block *xb;
5770        struct ocfs2_xattr_entry *xe;
5771        char *base;
5772        u32 p_cluster, num_clusters;
5773        unsigned int ext_flags;
5774        int name_offset, name_len;
5775        struct ocfs2_xattr_value_buf vb;
5776        struct ocfs2_xattr_bucket *bucket = NULL;
5777        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5778        struct ocfs2_post_refcount refcount;
5779        struct ocfs2_post_refcount *p = NULL;
5780        struct buffer_head *ref_root_bh = NULL;
5781
5782        if (!xis->not_found) {
5783                xe = xis->here;
5784                name_offset = le16_to_cpu(xe->xe_name_offset);
5785                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5786                base = xis->base;
5787                vb.vb_bh = xis->inode_bh;
5788                vb.vb_access = ocfs2_journal_access_di;
5789        } else {
5790                int i, block_off = 0;
5791                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5792                xe = xbs->here;
5793                name_offset = le16_to_cpu(xe->xe_name_offset);
5794                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5795                i = xbs->here - xbs->header->xh_entries;
5796
5797                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5798                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5799                                                        bucket_xh(xbs->bucket),
5800                                                        i, &block_off,
5801                                                        &name_offset);
5802                        if (ret) {
5803                                mlog_errno(ret);
5804                                goto out;
5805                        }
5806                        base = bucket_block(xbs->bucket, block_off);
5807                        vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5808                        vb.vb_access = ocfs2_journal_access;
5809
5810                        if (ocfs2_meta_ecc(osb)) {
5811                                /*create parameters for ocfs2_post_refcount. */
5812                                bucket = xbs->bucket;
5813                                refcount.credits = bucket->bu_blocks;
5814                                refcount.para = bucket;
5815                                refcount.func =
5816                                        ocfs2_xattr_bucket_post_refcount;
5817                                p = &refcount;
5818                        }
5819                } else {
5820                        base = xbs->base;
5821                        vb.vb_bh = xbs->xattr_bh;
5822                        vb.vb_access = ocfs2_journal_access_xb;
5823                }
5824        }
5825
5826        if (ocfs2_xattr_is_local(xe))
5827                goto out;
5828
5829        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5830                                (base + name_offset + name_len);
5831
5832        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5833                                       &num_clusters, &vb.vb_xv->xr_list,
5834                                       &ext_flags);
5835        if (ret) {
5836                mlog_errno(ret);
5837                goto out;
5838        }
5839
5840        /*
5841         * We just need to check the 1st extent record, since we always
5842         * CoW the whole xattr. So there shouldn't be a xattr with
5843         * some REFCOUNT extent recs after the 1st one.
5844         */
5845        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5846                goto out;
5847
5848        ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5849                                       1, ref_tree, &ref_root_bh);
5850        if (ret) {
5851                mlog_errno(ret);
5852                goto out;
5853        }
5854
5855        /*
5856         * If we are deleting the xattr or the new size will be stored inside,
5857         * cool, leave it there, the xattr truncate process will remove them
5858         * for us(it still needs the refcount tree lock and the meta, credits).
5859         * And the worse case is that every cluster truncate will split the
5860         * refcount tree, and make the original extent become 3. So we will need
5861         * 2 * cluster more extent recs at most.
5862         */
5863        if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5864
5865                ret = ocfs2_refcounted_xattr_delete_need(inode,
5866                                                         &(*ref_tree)->rf_ci,
5867                                                         ref_root_bh, vb.vb_xv,
5868                                                         meta_add, credits);
5869                if (ret)
5870                        mlog_errno(ret);
5871                goto out;
5872        }
5873
5874        ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5875                                       *ref_tree, ref_root_bh, 0,
5876                                       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5877        if (ret)
5878                mlog_errno(ret);
5879
5880out:
5881        brelse(ref_root_bh);
5882        return ret;
5883}
5884
5885/*
5886 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5887 * The physical clusters will be added to refcount tree.
5888 */
5889static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5890                                struct ocfs2_xattr_value_root *xv,
5891                                struct ocfs2_extent_tree *value_et,
5892                                struct ocfs2_caching_info *ref_ci,
5893                                struct buffer_head *ref_root_bh,
5894                                struct ocfs2_cached_dealloc_ctxt *dealloc,
5895                                struct ocfs2_post_refcount *refcount)
5896{
5897        int ret = 0;
5898        u32 clusters = le32_to_cpu(xv->xr_clusters);
5899        u32 cpos, p_cluster, num_clusters;
5900        struct ocfs2_extent_list *el = &xv->xr_list;
5901        unsigned int ext_flags;
5902
5903        cpos = 0;
5904        while (cpos < clusters) {
5905                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5906                                               &num_clusters, el, &ext_flags);
5907                if (ret) {
5908                        mlog_errno(ret);
5909                        break;
5910                }
5911
5912                cpos += num_clusters;
5913                if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5914                        continue;
5915
5916                BUG_ON(!p_cluster);
5917
5918                ret = ocfs2_add_refcount_flag(inode, value_et,
5919                                              ref_ci, ref_root_bh,
5920                                              cpos - num_clusters,
5921                                              p_cluster, num_clusters,
5922                                              dealloc, refcount);
5923                if (ret) {
5924                        mlog_errno(ret);
5925                        break;
5926                }
5927        }
5928
5929        return ret;
5930}
5931
5932/*
5933 * Given a normal ocfs2_xattr_header, refcount all the entries which
5934 * have value stored outside.
5935 * Used for xattrs stored in inode and ocfs2_xattr_block.
5936 */
5937static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5938                                struct ocfs2_xattr_value_buf *vb,
5939                                struct ocfs2_xattr_header *header,
5940                                struct ocfs2_caching_info *ref_ci,
5941                                struct buffer_head *ref_root_bh,
5942                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5943{
5944
5945        struct ocfs2_xattr_entry *xe;
5946        struct ocfs2_xattr_value_root *xv;
5947        struct ocfs2_extent_tree et;
5948        int i, ret = 0;
5949
5950        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5951                xe = &header->xh_entries[i];
5952
5953                if (ocfs2_xattr_is_local(xe))
5954                        continue;
5955
5956                xv = (struct ocfs2_xattr_value_root *)((void *)header +
5957                        le16_to_cpu(xe->xe_name_offset) +
5958                        OCFS2_XATTR_SIZE(xe->xe_name_len));
5959
5960                vb->vb_xv = xv;
5961                ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5962
5963                ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5964                                                        ref_ci, ref_root_bh,
5965                                                        dealloc, NULL);
5966                if (ret) {
5967                        mlog_errno(ret);
5968                        break;
5969                }
5970        }
5971
5972        return ret;
5973}
5974
5975static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5976                                struct buffer_head *fe_bh,
5977                                struct ocfs2_caching_info *ref_ci,
5978                                struct buffer_head *ref_root_bh,
5979                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5980{
5981        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5982        struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5983                                (fe_bh->b_data + inode->i_sb->s_blocksize -
5984                                le16_to_cpu(di->i_xattr_inline_size));
5985        struct ocfs2_xattr_value_buf vb = {
5986                .vb_bh = fe_bh,
5987                .vb_access = ocfs2_journal_access_di,
5988        };
5989
5990        return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5991                                                  ref_ci, ref_root_bh, dealloc);
5992}
5993
5994struct ocfs2_xattr_tree_value_refcount_para {
5995        struct ocfs2_caching_info *ref_ci;
5996        struct buffer_head *ref_root_bh;
5997        struct ocfs2_cached_dealloc_ctxt *dealloc;
5998};
5999
6000static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6001                                           struct ocfs2_xattr_bucket *bucket,
6002                                           int offset,
6003                                           struct ocfs2_xattr_value_root **xv,
6004                                           struct buffer_head **bh)
6005{
6006        int ret, block_off, name_offset;
6007        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6008        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6009        void *base;
6010
6011        ret = ocfs2_xattr_bucket_get_name_value(sb,
6012                                                bucket_xh(bucket),
6013                                                offset,
6014                                                &block_off,
6015                                                &name_offset);
6016        if (ret) {
6017                mlog_errno(ret);
6018                goto out;
6019        }
6020
6021        base = bucket_block(bucket, block_off);
6022
6023        *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6024                         OCFS2_XATTR_SIZE(xe->xe_name_len));
6025
6026        if (bh)
6027                *bh = bucket->bu_bhs[block_off];
6028out:
6029        return ret;
6030}
6031
6032/*
6033 * For a given xattr bucket, refcount all the entries which
6034 * have value stored outside.
6035 */
6036static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6037                                             struct ocfs2_xattr_bucket *bucket,
6038                                             void *para)
6039{
6040        int i, ret = 0;
6041        struct ocfs2_extent_tree et;
6042        struct ocfs2_xattr_tree_value_refcount_para *ref =
6043                        (struct ocfs2_xattr_tree_value_refcount_para *)para;
6044        struct ocfs2_xattr_header *xh =
6045                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6046        struct ocfs2_xattr_entry *xe;
6047        struct ocfs2_xattr_value_buf vb = {
6048                .vb_access = ocfs2_journal_access,
6049        };
6050        struct ocfs2_post_refcount refcount = {
6051                .credits = bucket->bu_blocks,
6052                .para = bucket,
6053                .func = ocfs2_xattr_bucket_post_refcount,
6054        };
6055        struct ocfs2_post_refcount *p = NULL;
6056
6057        /* We only need post_refcount if we support metaecc. */
6058        if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6059                p = &refcount;
6060
6061        trace_ocfs2_xattr_bucket_value_refcount(
6062                                (unsigned long long)bucket_blkno(bucket),
6063                                le16_to_cpu(xh->xh_count));
6064        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6065                xe = &xh->xh_entries[i];
6066
6067                if (ocfs2_xattr_is_local(xe))
6068                        continue;
6069
6070                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6071                                                      &vb.vb_xv, &vb.vb_bh);
6072                if (ret) {
6073                        mlog_errno(ret);
6074                        break;
6075                }
6076
6077                ocfs2_init_xattr_value_extent_tree(&et,
6078                                                   INODE_CACHE(inode), &vb);
6079
6080                ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6081                                                        &et, ref->ref_ci,
6082                                                        ref->ref_root_bh,
6083                                                        ref->dealloc, p);
6084                if (ret) {
6085                        mlog_errno(ret);
6086                        break;
6087                }
6088        }
6089
6090        return ret;
6091
6092}
6093
6094static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6095                                     struct buffer_head *root_bh,
6096                                     u64 blkno, u32 cpos, u32 len, void *para)
6097{
6098        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6099                                           ocfs2_xattr_bucket_value_refcount,
6100                                           para);
6101}
6102
6103static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6104                                struct buffer_head *blk_bh,
6105                                struct ocfs2_caching_info *ref_ci,
6106                                struct buffer_head *ref_root_bh,
6107                                struct ocfs2_cached_dealloc_ctxt *dealloc)
6108{
6109        int ret = 0;
6110        struct ocfs2_xattr_block *xb =
6111                                (struct ocfs2_xattr_block *)blk_bh->b_data;
6112
6113        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6114                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6115                struct ocfs2_xattr_value_buf vb = {
6116                        .vb_bh = blk_bh,
6117                        .vb_access = ocfs2_journal_access_xb,
6118                };
6119
6120                ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6121                                                         ref_ci, ref_root_bh,
6122                                                         dealloc);
6123        } else {
6124                struct ocfs2_xattr_tree_value_refcount_para para = {
6125                        .ref_ci = ref_ci,
6126                        .ref_root_bh = ref_root_bh,
6127                        .dealloc = dealloc,
6128                };
6129
6130                ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6131                                                ocfs2_refcount_xattr_tree_rec,
6132                                                &para);
6133        }
6134
6135        return ret;
6136}
6137
6138int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6139                                     struct buffer_head *fe_bh,
6140                                     struct ocfs2_caching_info *ref_ci,
6141                                     struct buffer_head *ref_root_bh,
6142                                     struct ocfs2_cached_dealloc_ctxt *dealloc)
6143{
6144        int ret = 0;
6145        struct ocfs2_inode_info *oi = OCFS2_I(inode);
6146        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6147        struct buffer_head *blk_bh = NULL;
6148
6149        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6150                ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6151                                                         ref_ci, ref_root_bh,
6152                                                         dealloc);
6153                if (ret) {
6154                        mlog_errno(ret);
6155                        goto out;
6156                }
6157        }
6158
6159        if (!di->i_xattr_loc)
6160                goto out;
6161
6162        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6163                                     &blk_bh);
6164        if (ret < 0) {
6165                mlog_errno(ret);
6166                goto out;
6167        }
6168
6169        ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6170                                                ref_root_bh, dealloc);
6171        if (ret)
6172                mlog_errno(ret);
6173
6174        brelse(blk_bh);
6175out:
6176
6177        return ret;
6178}
6179
6180typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6181/*
6182 * Store the information we need in xattr reflink.
6183 * old_bh and new_bh are inode bh for the old and new inode.
6184 */
6185struct ocfs2_xattr_reflink {
6186        struct inode *old_inode;
6187        struct inode *new_inode;
6188        struct buffer_head *old_bh;
6189        struct buffer_head *new_bh;
6190        struct ocfs2_caching_info *ref_ci;
6191        struct buffer_head *ref_root_bh;
6192        struct ocfs2_cached_dealloc_ctxt *dealloc;
6193        should_xattr_reflinked *xattr_reflinked;
6194};
6195
6196/*
6197 * Given a xattr header and xe offset,
6198 * return the proper xv and the corresponding bh.
6199 * xattr in inode, block and xattr tree have different implementaions.
6200 */
6201typedef int (get_xattr_value_root)(struct super_block *sb,
6202                                   struct buffer_head *bh,
6203                                   struct ocfs2_xattr_header *xh,
6204                                   int offset,
6205                                   struct ocfs2_xattr_value_root **xv,
6206                                   struct buffer_head **ret_bh,
6207                                   void *para);
6208
6209/*
6210 * Calculate all the xattr value root metadata stored in this xattr header and
6211 * credits we need if we create them from the scratch.
6212 * We use get_xattr_value_root so that all types of xattr container can use it.
6213 */
6214static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6215                                             struct buffer_head *bh,
6216                                             struct ocfs2_xattr_header *xh,
6217                                             int *metas, int *credits,
6218                                             int *num_recs,
6219                                             get_xattr_value_root *func,
6220                                             void *para)
6221{
6222        int i, ret = 0;
6223        struct ocfs2_xattr_value_root *xv;
6224        struct ocfs2_xattr_entry *xe;
6225
6226        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6227                xe = &xh->xh_entries[i];
6228                if (ocfs2_xattr_is_local(xe))
6229                        continue;
6230
6231                ret = func(sb, bh, xh, i, &xv, NULL, para);
6232                if (ret) {
6233                        mlog_errno(ret);
6234                        break;
6235                }
6236
6237                *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6238                          le16_to_cpu(xv->xr_list.l_next_free_rec);
6239
6240                *credits += ocfs2_calc_extend_credits(sb,
6241                                                &def_xv.xv.xr_list);
6242
6243                /*
6244                 * If the value is a tree with depth > 1, We don't go deep
6245                 * to the extent block, so just calculate a maximum record num.
6246                 */
6247                if (!xv->xr_list.l_tree_depth)
6248                        *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6249                else
6250                        *num_recs += ocfs2_clusters_for_bytes(sb,
6251                                                              XATTR_SIZE_MAX);
6252        }
6253
6254        return ret;
6255}
6256
6257/* Used by xattr inode and block to return the right xv and buffer_head. */
6258static int ocfs2_get_xattr_value_root(struct super_block *sb,
6259                                      struct buffer_head *bh,
6260                                      struct ocfs2_xattr_header *xh,
6261                                      int offset,
6262                                      struct ocfs2_xattr_value_root **xv,
6263                                      struct buffer_head **ret_bh,
6264                                      void *para)
6265{
6266        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6267
6268        *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6269                le16_to_cpu(xe->xe_name_offset) +
6270                OCFS2_XATTR_SIZE(xe->xe_name_len));
6271
6272        if (ret_bh)
6273                *ret_bh = bh;
6274
6275        return 0;
6276}
6277
6278/*
6279 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6280 * It is only used for inline xattr and xattr block.
6281 */
6282static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6283                                        struct ocfs2_xattr_header *xh,
6284                                        struct buffer_head *ref_root_bh,
6285                                        int *credits,
6286                                        struct ocfs2_alloc_context **meta_ac)
6287{
6288        int ret, meta_add = 0, num_recs = 0;
6289        struct ocfs2_refcount_block *rb =
6290                        (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6291
6292        *credits = 0;
6293
6294        ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6295                                                &meta_add, credits, &num_recs,
6296                                                ocfs2_get_xattr_value_root,
6297                                                NULL);
6298        if (ret) {
6299                mlog_errno(ret);
6300                goto out;
6301        }
6302
6303        /*
6304         * We need to add/modify num_recs in refcount tree, so just calculate
6305         * an approximate number we need for refcount tree change.
6306         * Sometimes we need to split the tree, and after split,  half recs
6307         * will be moved to the new block, and a new block can only provide
6308         * half number of recs. So we multiple new blocks by 2.
6309         */
6310        num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6311        meta_add += num_recs;
6312        *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6313        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6314                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6315                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6316        else
6317                *credits += 1;
6318
6319        ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6320        if (ret)
6321                mlog_errno(ret);
6322
6323out:
6324        return ret;
6325}
6326
6327/*
6328 * Given a xattr header, reflink all the xattrs in this container.
6329 * It can be used for inode, block and bucket.
6330 *
6331 * NOTE:
6332 * Before we call this function, the caller has memcpy the xattr in
6333 * old_xh to the new_xh.
6334 *
6335 * If args.xattr_reflinked is set, call it to decide whether the xe should
6336 * be reflinked or not. If not, remove it from the new xattr header.
6337 */
6338static int ocfs2_reflink_xattr_header(handle_t *handle,
6339                                      struct ocfs2_xattr_reflink *args,
6340                                      struct buffer_head *old_bh,
6341                                      struct ocfs2_xattr_header *xh,
6342                                      struct buffer_head *new_bh,
6343                                      struct ocfs2_xattr_header *new_xh,
6344                                      struct ocfs2_xattr_value_buf *vb,
6345                                      struct ocfs2_alloc_context *meta_ac,
6346                                      get_xattr_value_root *func,
6347                                      void *para)
6348{
6349        int ret = 0, i, j;
6350        struct super_block *sb = args->old_inode->i_sb;
6351        struct buffer_head *value_bh;
6352        struct ocfs2_xattr_entry *xe, *last;
6353        struct ocfs2_xattr_value_root *xv, *new_xv;
6354        struct ocfs2_extent_tree data_et;
6355        u32 clusters, cpos, p_cluster, num_clusters;
6356        unsigned int ext_flags = 0;
6357
6358        trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6359                                         le16_to_cpu(xh->xh_count));
6360
6361        last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6362        for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6363                xe = &xh->xh_entries[i];
6364
6365                if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6366                        xe = &new_xh->xh_entries[j];
6367
6368                        le16_add_cpu(&new_xh->xh_count, -1);
6369                        if (new_xh->xh_count) {
6370                                memmove(xe, xe + 1,
6371                                        (void *)last - (void *)xe);
6372                                memset(last, 0,
6373                                       sizeof(struct ocfs2_xattr_entry));
6374                        }
6375
6376                        /*
6377                         * We don't want j to increase in the next round since
6378                         * it is already moved ahead.
6379                         */
6380                        j--;
6381                        continue;
6382                }
6383
6384                if (ocfs2_xattr_is_local(xe))
6385                        continue;
6386
6387                ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6388                if (ret) {
6389                        mlog_errno(ret);
6390                        break;
6391                }
6392
6393                ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6394                if (ret) {
6395                        mlog_errno(ret);
6396                        break;
6397                }
6398
6399                /*
6400                 * For the xattr which has l_tree_depth = 0, all the extent
6401                 * recs have already be copied to the new xh with the
6402                 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6403                 * increase the refount count int the refcount tree.
6404                 *
6405                 * For the xattr which has l_tree_depth > 0, we need
6406                 * to initialize it to the empty default value root,
6407                 * and then insert the extents one by one.
6408                 */
6409                if (xv->xr_list.l_tree_depth) {
6410                        memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
6411                        vb->vb_xv = new_xv;
6412                        vb->vb_bh = value_bh;
6413                        ocfs2_init_xattr_value_extent_tree(&data_et,
6414                                        INODE_CACHE(args->new_inode), vb);
6415                }
6416
6417                clusters = le32_to_cpu(xv->xr_clusters);
6418                cpos = 0;
6419                while (cpos < clusters) {
6420                        ret = ocfs2_xattr_get_clusters(args->old_inode,
6421                                                       cpos,
6422                                                       &p_cluster,
6423                                                       &num_clusters,
6424                                                       &xv->xr_list,
6425                                                       &ext_flags);
6426                        if (ret) {
6427                                mlog_errno(ret);
6428                                goto out;
6429                        }
6430
6431                        BUG_ON(!p_cluster);
6432
6433                        if (xv->xr_list.l_tree_depth) {
6434                                ret = ocfs2_insert_extent(handle,
6435                                                &data_et, cpos,
6436                                                ocfs2_clusters_to_blocks(
6437                                                        args->old_inode->i_sb,
6438                                                        p_cluster),
6439                                                num_clusters, ext_flags,
6440                                                meta_ac);
6441                                if (ret) {
6442                                        mlog_errno(ret);
6443                                        goto out;
6444                                }
6445                        }
6446
6447                        ret = ocfs2_increase_refcount(handle, args->ref_ci,
6448                                                      args->ref_root_bh,
6449                                                      p_cluster, num_clusters,
6450                                                      meta_ac, args->dealloc);
6451                        if (ret) {
6452                                mlog_errno(ret);
6453                                goto out;
6454                        }
6455
6456                        cpos += num_clusters;
6457                }
6458        }
6459
6460out:
6461        return ret;
6462}
6463
6464static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6465{
6466        int ret = 0, credits = 0;
6467        handle_t *handle;
6468        struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6469        struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6470        int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6471        int header_off = osb->sb->s_blocksize - inline_size;
6472        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6473                                        (args->old_bh->b_data + header_off);
6474        struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6475                                        (args->new_bh->b_data + header_off);
6476        struct ocfs2_alloc_context *meta_ac = NULL;
6477        struct ocfs2_inode_info *new_oi;
6478        struct ocfs2_dinode *new_di;
6479        struct ocfs2_xattr_value_buf vb = {
6480                .vb_bh = args->new_bh,
6481                .vb_access = ocfs2_journal_access_di,
6482        };
6483
6484        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6485                                                  &credits, &meta_ac);
6486        if (ret) {
6487                mlog_errno(ret);
6488                goto out;
6489        }
6490
6491        handle = ocfs2_start_trans(osb, credits);
6492        if (IS_ERR(handle)) {
6493                ret = PTR_ERR(handle);
6494                mlog_errno(ret);
6495                goto out;
6496        }
6497
6498        ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6499                                      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6500        if (ret) {
6501                mlog_errno(ret);
6502                goto out_commit;
6503        }
6504
6505        memcpy(args->new_bh->b_data + header_off,
6506               args->old_bh->b_data + header_off, inline_size);
6507
6508        new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6509        new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6510
6511        ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6512                                         args->new_bh, new_xh, &vb, meta_ac,
6513                                         ocfs2_get_xattr_value_root, NULL);
6514        if (ret) {
6515                mlog_errno(ret);
6516                goto out_commit;
6517        }
6518
6519        new_oi = OCFS2_I(args->new_inode);
6520        /*
6521         * Adjust extent record count to reserve space for extended attribute.
6522         * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
6523         */
6524        if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
6525            !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
6526                struct ocfs2_extent_list *el = &new_di->id2.i_list;
6527                le16_add_cpu(&el->l_count, -(inline_size /
6528                                        sizeof(struct ocfs2_extent_rec)));
6529        }
6530        spin_lock(&new_oi->ip_lock);
6531        new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6532        new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6533        spin_unlock(&new_oi->ip_lock);
6534
6535        ocfs2_journal_dirty(handle, args->new_bh);
6536
6537out_commit:
6538        ocfs2_commit_trans(osb, handle);
6539
6540out:
6541        if (meta_ac)
6542                ocfs2_free_alloc_context(meta_ac);
6543        return ret;
6544}
6545
6546static int ocfs2_create_empty_xattr_block(struct inode *inode,
6547                                          struct buffer_head *fe_bh,
6548                                          struct buffer_head **ret_bh,
6549                                          int indexed)
6550{
6551        int ret;
6552        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6553        struct ocfs2_xattr_set_ctxt ctxt;
6554
6555        memset(&ctxt, 0, sizeof(ctxt));
6556        ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6557        if (ret < 0) {
6558                mlog_errno(ret);
6559                return ret;
6560        }
6561
6562        ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6563        if (IS_ERR(ctxt.handle)) {
6564                ret = PTR_ERR(ctxt.handle);
6565                mlog_errno(ret);
6566                goto out;
6567        }
6568
6569        trace_ocfs2_create_empty_xattr_block(
6570                                (unsigned long long)fe_bh->b_blocknr, indexed);
6571        ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6572                                       ret_bh);
6573        if (ret)
6574                mlog_errno(ret);
6575
6576        ocfs2_commit_trans(osb, ctxt.handle);
6577out:
6578        ocfs2_free_alloc_context(ctxt.meta_ac);
6579        return ret;
6580}
6581
6582static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6583                                     struct buffer_head *blk_bh,
6584                                     struct buffer_head *new_blk_bh)
6585{
6586        int ret = 0, credits = 0;
6587        handle_t *handle;
6588        struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6589        struct ocfs2_dinode *new_di;
6590        struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6591        int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6592        struct ocfs2_xattr_block *xb =
6593                        (struct ocfs2_xattr_block *)blk_bh->b_data;
6594        struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6595        struct ocfs2_xattr_block *new_xb =
6596                        (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6597        struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6598        struct ocfs2_alloc_context *meta_ac;
6599        struct ocfs2_xattr_value_buf vb = {
6600                .vb_bh = new_blk_bh,
6601                .vb_access = ocfs2_journal_access_xb,
6602        };
6603
6604        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6605                                                  &credits, &meta_ac);
6606        if (ret) {
6607                mlog_errno(ret);
6608                return ret;
6609        }
6610
6611        /* One more credits in case we need to add xattr flags in new inode. */
6612        handle = ocfs2_start_trans(osb, credits + 1);
6613        if (IS_ERR(handle)) {
6614                ret = PTR_ERR(handle);
6615                mlog_errno(ret);
6616                goto out;
6617        }
6618
6619        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6620                ret = ocfs2_journal_access_di(handle,
6621                                              INODE_CACHE(args->new_inode),
6622                                              args->new_bh,
6623                                              OCFS2_JOURNAL_ACCESS_WRITE);
6624                if (ret) {
6625                        mlog_errno(ret);
6626                        goto out_commit;
6627                }
6628        }
6629
6630        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6631                                      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6632        if (ret) {
6633                mlog_errno(ret);
6634                goto out_commit;
6635        }
6636
6637        memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6638               osb->sb->s_blocksize - header_off);
6639
6640        ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6641                                         new_blk_bh, new_xh, &vb, meta_ac,
6642                                         ocfs2_get_xattr_value_root, NULL);
6643        if (ret) {
6644                mlog_errno(ret);
6645                goto out_commit;
6646        }
6647
6648        ocfs2_journal_dirty(handle, new_blk_bh);
6649
6650        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6651                new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6652                spin_lock(&new_oi->ip_lock);
6653                new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6654                new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6655                spin_unlock(&new_oi->ip_lock);
6656
6657                ocfs2_journal_dirty(handle, args->new_bh);
6658        }
6659
6660out_commit:
6661        ocfs2_commit_trans(osb, handle);
6662
6663out:
6664        ocfs2_free_alloc_context(meta_ac);
6665        return ret;
6666}
6667
6668struct ocfs2_reflink_xattr_tree_args {
6669        struct ocfs2_xattr_reflink *reflink;
6670        struct buffer_head *old_blk_bh;
6671        struct buffer_head *new_blk_bh;
6672        struct ocfs2_xattr_bucket *old_bucket;
6673        struct ocfs2_xattr_bucket *new_bucket;
6674};
6675
6676/*
6677 * NOTE:
6678 * We have to handle the case that both old bucket and new bucket
6679 * will call this function to get the right ret_bh.
6680 * So The caller must give us the right bh.
6681 */
6682static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6683                                        struct buffer_head *bh,
6684                                        struct ocfs2_xattr_header *xh,
6685                                        int offset,
6686                                        struct ocfs2_xattr_value_root **xv,
6687                                        struct buffer_head **ret_bh,
6688                                        void *para)
6689{
6690        struct ocfs2_reflink_xattr_tree_args *args =
6691                        (struct ocfs2_reflink_xattr_tree_args *)para;
6692        struct ocfs2_xattr_bucket *bucket;
6693
6694        if (bh == args->old_bucket->bu_bhs[0])
6695                bucket = args->old_bucket;
6696        else
6697                bucket = args->new_bucket;
6698
6699        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6700                                               xv, ret_bh);
6701}
6702
6703struct ocfs2_value_tree_metas {
6704        int num_metas;
6705        int credits;
6706        int num_recs;
6707};
6708
6709static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6710                                        struct buffer_head *bh,
6711                                        struct ocfs2_xattr_header *xh,
6712                                        int offset,
6713                                        struct ocfs2_xattr_value_root **xv,
6714                                        struct buffer_head **ret_bh,
6715                                        void *para)
6716{
6717        struct ocfs2_xattr_bucket *bucket =
6718                                (struct ocfs2_xattr_bucket *)para;
6719
6720        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6721                                               xv, ret_bh);
6722}
6723
6724static int ocfs2_calc_value_tree_metas(struct inode *inode,
6725                                      struct ocfs2_xattr_bucket *bucket,
6726                                      void *para)
6727{
6728        struct ocfs2_value_tree_metas *metas =
6729                        (struct ocfs2_value_tree_metas *)para;
6730        struct ocfs2_xattr_header *xh =
6731                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6732
6733        /* Add the credits for this bucket first. */
6734        metas->credits += bucket->bu_blocks;
6735        return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6736                                        xh, &metas->num_metas,
6737                                        &metas->credits, &metas->num_recs,
6738                                        ocfs2_value_tree_metas_in_bucket,
6739                                        bucket);
6740}
6741
6742/*
6743 * Given a xattr extent rec starting from blkno and having len clusters,
6744 * iterate all the buckets calculate how much metadata we need for reflinking
6745 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6746 */
6747static int ocfs2_lock_reflink_xattr_rec_allocators(
6748                                struct ocfs2_reflink_xattr_tree_args *args,
6749                                struct ocfs2_extent_tree *xt_et,
6750                                u64 blkno, u32 len, int *credits,
6751                                struct ocfs2_alloc_context **meta_ac,
6752                                struct ocfs2_alloc_context **data_ac)
6753{
6754        int ret, num_free_extents;
6755        struct ocfs2_value_tree_metas metas;
6756        struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6757        struct ocfs2_refcount_block *rb;
6758
6759        memset(&metas, 0, sizeof(metas));
6760
6761        ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6762                                          ocfs2_calc_value_tree_metas, &metas);
6763        if (ret) {
6764                mlog_errno(ret);
6765                goto out;
6766        }
6767
6768        *credits = metas.credits;
6769
6770        /*
6771         * Calculate we need for refcount tree change.
6772         *
6773         * We need to add/modify num_recs in refcount tree, so just calculate
6774         * an approximate number we need for refcount tree change.
6775         * Sometimes we need to split the tree, and after split,  half recs
6776         * will be moved to the new block, and a new block can only provide
6777         * half number of recs. So we multiple new blocks by 2.
6778         * In the end, we have to add credits for modifying the already
6779         * existed refcount block.
6780         */
6781        rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6782        metas.num_recs =
6783                (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6784                 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6785        metas.num_metas += metas.num_recs;
6786        *credits += metas.num_recs +
6787                    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6788        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6789                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6790                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6791        else
6792                *credits += 1;
6793
6794        /* count in the xattr tree change. */
6795        num_free_extents = ocfs2_num_free_extents(xt_et);
6796        if (num_free_extents < 0) {
6797                ret = num_free_extents;
6798                mlog_errno(ret);
6799                goto out;
6800        }
6801
6802        if (num_free_extents < len)
6803                metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6804
6805        *credits += ocfs2_calc_extend_credits(osb->sb,
6806                                              xt_et->et_root_el);
6807
6808        if (metas.num_metas) {
6809                ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6810                                                        meta_ac);
6811                if (ret) {
6812                        mlog_errno(ret);
6813                        goto out;
6814                }
6815        }
6816
6817        if (len) {
6818                ret = ocfs2_reserve_clusters(osb, len, data_ac);
6819                if (ret)
6820                        mlog_errno(ret);
6821        }
6822out:
6823        if (ret) {
6824                if (*meta_ac) {
6825                        ocfs2_free_alloc_context(*meta_ac);
6826                        *meta_ac = NULL;
6827                }
6828        }
6829
6830        return ret;
6831}
6832
6833static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6834                                u64 blkno, u64 new_blkno, u32 clusters,
6835                                u32 *cpos, int num_buckets,
6836                                struct ocfs2_alloc_context *meta_ac,
6837                                struct ocfs2_alloc_context *data_ac,
6838                                struct ocfs2_reflink_xattr_tree_args *args)
6839{
6840        int i, j, ret = 0;
6841        struct super_block *sb = args->reflink->old_inode->i_sb;
6842        int bpb = args->old_bucket->bu_blocks;
6843        struct ocfs2_xattr_value_buf vb = {
6844                .vb_access = ocfs2_journal_access,
6845        };
6846
6847        for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6848                ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6849                if (ret) {
6850                        mlog_errno(ret);
6851                        break;
6852                }
6853
6854                ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6855                if (ret) {
6856                        mlog_errno(ret);
6857                        break;
6858                }
6859
6860                ret = ocfs2_xattr_bucket_journal_access(handle,
6861                                                args->new_bucket,
6862                                                OCFS2_JOURNAL_ACCESS_CREATE);
6863                if (ret) {
6864                        mlog_errno(ret);
6865                        break;
6866                }
6867
6868                for (j = 0; j < bpb; j++)
6869                        memcpy(bucket_block(args->new_bucket, j),
6870                               bucket_block(args->old_bucket, j),
6871                               sb->s_blocksize);
6872
6873                /*
6874                 * Record the start cpos so that we can use it to initialize
6875                 * our xattr tree we also set the xh_num_bucket for the new
6876                 * bucket.
6877                 */
6878                if (i == 0) {
6879                        *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6880                                            xh_entries[0].xe_name_hash);
6881                        bucket_xh(args->new_bucket)->xh_num_buckets =
6882                                cpu_to_le16(num_buckets);
6883                }
6884
6885                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6886
6887                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6888                                        args->old_bucket->bu_bhs[0],
6889                                        bucket_xh(args->old_bucket),
6890                                        args->new_bucket->bu_bhs[0],
6891                                        bucket_xh(args->new_bucket),
6892                                        &vb, meta_ac,
6893                                        ocfs2_get_reflink_xattr_value_root,
6894                                        args);
6895                if (ret) {
6896                        mlog_errno(ret);
6897                        break;
6898                }
6899
6900                /*
6901                 * Re-access and dirty the bucket to calculate metaecc.
6902                 * Because we may extend the transaction in reflink_xattr_header
6903                 * which will let the already accessed block gone.
6904                 */
6905                ret = ocfs2_xattr_bucket_journal_access(handle,
6906                                                args->new_bucket,
6907                                                OCFS2_JOURNAL_ACCESS_WRITE);
6908                if (ret) {
6909                        mlog_errno(ret);
6910                        break;
6911                }
6912
6913                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6914
6915                ocfs2_xattr_bucket_relse(args->old_bucket);
6916                ocfs2_xattr_bucket_relse(args->new_bucket);
6917        }
6918
6919        ocfs2_xattr_bucket_relse(args->old_bucket);
6920        ocfs2_xattr_bucket_relse(args->new_bucket);
6921        return ret;
6922}
6923
6924static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6925                                struct inode *inode,
6926                                struct ocfs2_reflink_xattr_tree_args *args,
6927                                struct ocfs2_extent_tree *et,
6928                                struct ocfs2_alloc_context *meta_ac,
6929                                struct ocfs2_alloc_context *data_ac,
6930                                u64 blkno, u32 cpos, u32 len)
6931{
6932        int ret, first_inserted = 0;
6933        u32 p_cluster, num_clusters, reflink_cpos = 0;
6934        u64 new_blkno;
6935        unsigned int num_buckets, reflink_buckets;
6936        unsigned int bpc =
6937                ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6938
6939        ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6940        if (ret) {
6941                mlog_errno(ret);
6942                goto out;
6943        }
6944        num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6945        ocfs2_xattr_bucket_relse(args->old_bucket);
6946
6947        while (len && num_buckets) {
6948                ret = ocfs2_claim_clusters(handle, data_ac,
6949                                           1, &p_cluster, &num_clusters);
6950                if (ret) {
6951                        mlog_errno(ret);
6952                        goto out;
6953                }
6954
6955                new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6956                reflink_buckets = min(num_buckets, bpc * num_clusters);
6957
6958                ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6959                                                 new_blkno, num_clusters,
6960                                                 &reflink_cpos, reflink_buckets,
6961                                                 meta_ac, data_ac, args);
6962                if (ret) {
6963                        mlog_errno(ret);
6964                        goto out;
6965                }
6966
6967                /*
6968                 * For the 1st allocated cluster, we make it use the same cpos
6969                 * so that the xattr tree looks the same as the original one
6970                 * in the most case.
6971                 */
6972                if (!first_inserted) {
6973                        reflink_cpos = cpos;
6974                        first_inserted = 1;
6975                }
6976                ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6977                                          num_clusters, 0, meta_ac);
6978                if (ret)
6979                        mlog_errno(ret);
6980
6981                trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6982                                                  num_clusters, reflink_cpos);
6983
6984                len -= num_clusters;
6985                blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6986                num_buckets -= reflink_buckets;
6987        }
6988out:
6989        return ret;
6990}
6991
6992/*
6993 * Create the same xattr extent record in the new inode's xattr tree.
6994 */
6995static int ocfs2_reflink_xattr_rec(struct inode *inode,
6996                                   struct buffer_head *root_bh,
6997                                   u64 blkno,
6998                                   u32 cpos,
6999                                   u32 len,
7000                                   void *para)
7001{
7002        int ret, credits = 0;
7003        handle_t *handle;
7004        struct ocfs2_reflink_xattr_tree_args *args =
7005                        (struct ocfs2_reflink_xattr_tree_args *)para;
7006        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7007        struct ocfs2_alloc_context *meta_ac = NULL;
7008        struct ocfs2_alloc_context *data_ac = NULL;
7009        struct ocfs2_extent_tree et;
7010
7011        trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
7012
7013        ocfs2_init_xattr_tree_extent_tree(&et,
7014                                          INODE_CACHE(args->reflink->new_inode),
7015                                          args->new_blk_bh);
7016
7017        ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7018                                                      len, &credits,
7019                                                      &meta_ac, &data_ac);
7020        if (ret) {
7021                mlog_errno(ret);
7022                goto out;
7023        }
7024
7025        handle = ocfs2_start_trans(osb, credits);
7026        if (IS_ERR(handle)) {
7027                ret = PTR_ERR(handle);
7028                mlog_errno(ret);
7029                goto out;
7030        }
7031
7032        ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7033                                          meta_ac, data_ac,
7034                                          blkno, cpos, len);
7035        if (ret)
7036                mlog_errno(ret);
7037
7038        ocfs2_commit_trans(osb, handle);
7039
7040out:
7041        if (meta_ac)
7042                ocfs2_free_alloc_context(meta_ac);
7043        if (data_ac)
7044                ocfs2_free_alloc_context(data_ac);
7045        return ret;
7046}
7047
7048/*
7049 * Create reflinked xattr buckets.
7050 * We will add bucket one by one, and refcount all the xattrs in the bucket
7051 * if they are stored outside.
7052 */
7053static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7054                                    struct buffer_head *blk_bh,
7055                                    struct buffer_head *new_blk_bh)
7056{
7057        int ret;
7058        struct ocfs2_reflink_xattr_tree_args para;
7059
7060        memset(&para, 0, sizeof(para));
7061        para.reflink = args;
7062        para.old_blk_bh = blk_bh;
7063        para.new_blk_bh = new_blk_bh;
7064
7065        para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7066        if (!para.old_bucket) {
7067                mlog_errno(-ENOMEM);
7068                return -ENOMEM;
7069        }
7070
7071        para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7072        if (!para.new_bucket) {
7073                ret = -ENOMEM;
7074                mlog_errno(ret);
7075                goto out;
7076        }
7077
7078        ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7079                                              ocfs2_reflink_xattr_rec,
7080                                              &para);
7081        if (ret)
7082                mlog_errno(ret);
7083
7084out:
7085        ocfs2_xattr_bucket_free(para.old_bucket);
7086        ocfs2_xattr_bucket_free(para.new_bucket);
7087        return ret;
7088}
7089
7090static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7091                                        struct buffer_head *blk_bh)
7092{
7093        int ret, indexed = 0;
7094        struct buffer_head *new_blk_bh = NULL;
7095        struct ocfs2_xattr_block *xb =
7096                        (struct ocfs2_xattr_block *)blk_bh->b_data;
7097
7098
7099        if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7100                indexed = 1;
7101
7102        ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7103                                             &new_blk_bh, indexed);
7104        if (ret) {
7105                mlog_errno(ret);
7106                goto out;
7107        }
7108
7109        if (!indexed)
7110                ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7111        else
7112                ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7113        if (ret)
7114                mlog_errno(ret);
7115
7116out:
7117        brelse(new_blk_bh);
7118        return ret;
7119}
7120
7121static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7122{
7123        int type = ocfs2_xattr_get_type(xe);
7124
7125        return type != OCFS2_XATTR_INDEX_SECURITY &&
7126               type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7127               type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7128}
7129
7130int ocfs2_reflink_xattrs(struct inode *old_inode,
7131                         struct buffer_head *old_bh,
7132                         struct inode *new_inode,
7133                         struct buffer_head *new_bh,
7134                         bool preserve_security)
7135{
7136        int ret;
7137        struct ocfs2_xattr_reflink args;
7138        struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7139        struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7140        struct buffer_head *blk_bh = NULL;
7141        struct ocfs2_cached_dealloc_ctxt dealloc;
7142        struct ocfs2_refcount_tree *ref_tree;
7143        struct buffer_head *ref_root_bh = NULL;
7144
7145        ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7146                                       le64_to_cpu(di->i_refcount_loc),
7147                                       1, &ref_tree, &ref_root_bh);
7148        if (ret) {
7149                mlog_errno(ret);
7150                goto out;
7151        }
7152
7153        ocfs2_init_dealloc_ctxt(&dealloc);
7154
7155        args.old_inode = old_inode;
7156        args.new_inode = new_inode;
7157        args.old_bh = old_bh;
7158        args.new_bh = new_bh;
7159        args.ref_ci = &ref_tree->rf_ci;
7160        args.ref_root_bh = ref_root_bh;
7161        args.dealloc = &dealloc;
7162        if (preserve_security)
7163                args.xattr_reflinked = NULL;
7164        else
7165                args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7166
7167        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7168                ret = ocfs2_reflink_xattr_inline(&args);
7169                if (ret) {
7170                        mlog_errno(ret);
7171                        goto out_unlock;
7172                }
7173        }
7174
7175        if (!di->i_xattr_loc)
7176                goto out_unlock;
7177
7178        ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7179                                     &blk_bh);
7180        if (ret < 0) {
7181                mlog_errno(ret);
7182                goto out_unlock;
7183        }
7184
7185        ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7186        if (ret)
7187                mlog_errno(ret);
7188
7189        brelse(blk_bh);
7190
7191out_unlock:
7192        ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7193                                   ref_tree, 1);
7194        brelse(ref_root_bh);
7195
7196        if (ocfs2_dealloc_has_cluster(&dealloc)) {
7197                ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7198                ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7199        }
7200
7201out:
7202        return ret;
7203}
7204
7205/*
7206 * Initialize security and acl for a already created inode.
7207 * Used for reflink a non-preserve-security file.
7208 *
7209 * It uses common api like ocfs2_xattr_set, so the caller
7210 * must not hold any lock expect i_mutex.
7211 */
7212int ocfs2_init_security_and_acl(struct inode *dir,
7213                                struct inode *inode,
7214                                const struct qstr *qstr)
7215{
7216        int ret = 0;
7217        struct buffer_head *dir_bh = NULL;
7218
7219        ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7220        if (ret) {
7221                mlog_errno(ret);
7222                goto leave;
7223        }
7224
7225        ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7226        if (ret) {
7227                mlog_errno(ret);
7228                goto leave;
7229        }
7230        ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7231        if (ret)
7232                mlog_errno(ret);
7233
7234        ocfs2_inode_unlock(dir, 0);
7235        brelse(dir_bh);
7236leave:
7237        return ret;
7238}
7239
7240/*
7241 * 'security' attributes support
7242 */
7243static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
7244                                    struct dentry *unused, struct inode *inode,
7245                                    const char *name, void *buffer, size_t size)
7246{
7247        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY,
7248                               name, buffer, size);
7249}
7250
7251static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
7252                                    struct dentry *unused, struct inode *inode,
7253                                    const char *name, const void *value,
7254                                    size_t size, int flags)
7255{
7256        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7257                               name, value, size, flags);
7258}
7259
7260static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7261                     void *fs_info)
7262{
7263        const struct xattr *xattr;
7264        int err = 0;
7265
7266        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7267                err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7268                                      xattr->name, xattr->value,
7269                                      xattr->value_len, XATTR_CREATE);
7270                if (err)
7271                        break;
7272        }
7273        return err;
7274}
7275
7276int ocfs2_init_security_get(struct inode *inode,
7277                            struct inode *dir,
7278                            const struct qstr *qstr,
7279                            struct ocfs2_security_xattr_info *si)
7280{
7281        /* check whether ocfs2 support feature xattr */
7282        if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7283                return -EOPNOTSUPP;
7284        if (si)
7285                return security_old_inode_init_security(inode, dir, qstr,
7286                                                        &si->name, &si->value,
7287                                                        &si->value_len);
7288
7289        return security_inode_init_security(inode, dir, qstr,
7290                                            &ocfs2_initxattrs, NULL);
7291}
7292
7293int ocfs2_init_security_set(handle_t *handle,
7294                            struct inode *inode,
7295                            struct buffer_head *di_bh,
7296                            struct ocfs2_security_xattr_info *si,
7297                            struct ocfs2_alloc_context *xattr_ac,
7298                            struct ocfs2_alloc_context *data_ac)
7299{
7300        return ocfs2_xattr_set_handle(handle, inode, di_bh,
7301                                     OCFS2_XATTR_INDEX_SECURITY,
7302                                     si->name, si->value, si->value_len, 0,
7303                                     xattr_ac, data_ac);
7304}
7305
7306const struct xattr_handler ocfs2_xattr_security_handler = {
7307        .prefix = XATTR_SECURITY_PREFIX,
7308        .get    = ocfs2_xattr_security_get,
7309        .set    = ocfs2_xattr_security_set,
7310};
7311
7312/*
7313 * 'trusted' attributes support
7314 */
7315static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
7316                                   struct dentry *unused, struct inode *inode,
7317                                   const char *name, void *buffer, size_t size)
7318{
7319        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED,
7320                               name, buffer, size);
7321}
7322
7323static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
7324                                   struct dentry *unused, struct inode *inode,
7325                                   const char *name, const void *value,
7326                                   size_t size, int flags)
7327{
7328        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED,
7329                               name, value, size, flags);
7330}
7331
7332const struct xattr_handler ocfs2_xattr_trusted_handler = {
7333        .prefix = XATTR_TRUSTED_PREFIX,
7334        .get    = ocfs2_xattr_trusted_get,
7335        .set    = ocfs2_xattr_trusted_set,
7336};
7337
7338/*
7339 * 'user' attributes support
7340 */
7341static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
7342                                struct dentry *unused, struct inode *inode,
7343                                const char *name, void *buffer, size_t size)
7344{
7345        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7346
7347        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7348                return -EOPNOTSUPP;
7349        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7350                               buffer, size);
7351}
7352
7353static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
7354                                struct dentry *unused, struct inode *inode,
7355                                const char *name, const void *value,
7356                                size_t size, int flags)
7357{
7358        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7359
7360        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7361                return -EOPNOTSUPP;
7362
7363        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER,
7364                               name, value, size, flags);
7365}
7366
7367const struct xattr_handler ocfs2_xattr_user_handler = {
7368        .prefix = XATTR_USER_PREFIX,
7369        .get    = ocfs2_xattr_user_get,
7370        .set    = ocfs2_xattr_user_set,
7371};
7372