linux/fs/ocfs2/xattr.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * xattr.c
   5 *
   6 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
   7 *
   8 * CREDITS:
   9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
  10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  11 *
  12 * This program is free software; you can redistribute it and/or
  13 * modify it under the terms of the GNU General Public
  14 * License version 2 as published by the Free Software Foundation.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 * General Public License for more details.
  20 */
  21
  22#include <linux/capability.h>
  23#include <linux/fs.h>
  24#include <linux/types.h>
  25#include <linux/slab.h>
  26#include <linux/highmem.h>
  27#include <linux/pagemap.h>
  28#include <linux/uio.h>
  29#include <linux/sched.h>
  30#include <linux/splice.h>
  31#include <linux/mount.h>
  32#include <linux/writeback.h>
  33#include <linux/falloc.h>
  34#include <linux/sort.h>
  35#include <linux/init.h>
  36#include <linux/module.h>
  37#include <linux/string.h>
  38#include <linux/security.h>
  39
  40#include <cluster/masklog.h>
  41
  42#include "ocfs2.h"
  43#include "alloc.h"
  44#include "blockcheck.h"
  45#include "dlmglue.h"
  46#include "file.h"
  47#include "symlink.h"
  48#include "sysfile.h"
  49#include "inode.h"
  50#include "journal.h"
  51#include "ocfs2_fs.h"
  52#include "suballoc.h"
  53#include "uptodate.h"
  54#include "buffer_head_io.h"
  55#include "super.h"
  56#include "xattr.h"
  57#include "refcounttree.h"
  58#include "acl.h"
  59#include "ocfs2_trace.h"
  60
  61struct ocfs2_xattr_def_value_root {
  62        struct ocfs2_xattr_value_root   xv;
  63        struct ocfs2_extent_rec         er;
  64};
  65
  66struct ocfs2_xattr_bucket {
  67        /* The inode these xattrs are associated with */
  68        struct inode *bu_inode;
  69
  70        /* The actual buffers that make up the bucket */
  71        struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
  72
  73        /* How many blocks make up one bucket for this filesystem */
  74        int bu_blocks;
  75};
  76
  77struct ocfs2_xattr_set_ctxt {
  78        handle_t *handle;
  79        struct ocfs2_alloc_context *meta_ac;
  80        struct ocfs2_alloc_context *data_ac;
  81        struct ocfs2_cached_dealloc_ctxt dealloc;
  82        int set_abort;
  83};
  84
  85#define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
  86#define OCFS2_XATTR_INLINE_SIZE 80
  87#define OCFS2_XATTR_HEADER_GAP  4
  88#define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
  89                                         - sizeof(struct ocfs2_xattr_header) \
  90                                         - OCFS2_XATTR_HEADER_GAP)
  91#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
  92                                         - sizeof(struct ocfs2_xattr_block) \
  93                                         - sizeof(struct ocfs2_xattr_header) \
  94                                         - OCFS2_XATTR_HEADER_GAP)
  95
  96static struct ocfs2_xattr_def_value_root def_xv = {
  97        .xv.xr_list.l_count = cpu_to_le16(1),
  98};
  99
 100const struct xattr_handler *ocfs2_xattr_handlers[] = {
 101        &ocfs2_xattr_user_handler,
 102        &posix_acl_access_xattr_handler,
 103        &posix_acl_default_xattr_handler,
 104        &ocfs2_xattr_trusted_handler,
 105        &ocfs2_xattr_security_handler,
 106        NULL
 107};
 108
 109static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 110        [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
 111        [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
 112                                        = &posix_acl_access_xattr_handler,
 113        [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
 114                                        = &posix_acl_default_xattr_handler,
 115        [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
 116        [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
 117};
 118
 119struct ocfs2_xattr_info {
 120        int             xi_name_index;
 121        const char      *xi_name;
 122        int             xi_name_len;
 123        const void      *xi_value;
 124        size_t          xi_value_len;
 125};
 126
 127struct ocfs2_xattr_search {
 128        struct buffer_head *inode_bh;
 129        /*
 130         * xattr_bh point to the block buffer head which has extended attribute
 131         * when extended attribute in inode, xattr_bh is equal to inode_bh.
 132         */
 133        struct buffer_head *xattr_bh;
 134        struct ocfs2_xattr_header *header;
 135        struct ocfs2_xattr_bucket *bucket;
 136        void *base;
 137        void *end;
 138        struct ocfs2_xattr_entry *here;
 139        int not_found;
 140};
 141
 142/* Operations on struct ocfs2_xa_entry */
 143struct ocfs2_xa_loc;
 144struct ocfs2_xa_loc_operations {
 145        /*
 146         * Journal functions
 147         */
 148        int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
 149                                  int type);
 150        void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
 151
 152        /*
 153         * Return a pointer to the appropriate buffer in loc->xl_storage
 154         * at the given offset from loc->xl_header.
 155         */
 156        void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
 157
 158        /* Can we reuse the existing entry for the new value? */
 159        int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
 160                             struct ocfs2_xattr_info *xi);
 161
 162        /* How much space is needed for the new value? */
 163        int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
 164                               struct ocfs2_xattr_info *xi);
 165
 166        /*
 167         * Return the offset of the first name+value pair.  This is
 168         * the start of our downward-filling free space.
 169         */
 170        int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
 171
 172        /*
 173         * Remove the name+value at this location.  Do whatever is
 174         * appropriate with the remaining name+value pairs.
 175         */
 176        void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
 177
 178        /* Fill xl_entry with a new entry */
 179        void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
 180
 181        /* Add name+value storage to an entry */
 182        void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
 183
 184        /*
 185         * Initialize the value buf's access and bh fields for this entry.
 186         * ocfs2_xa_fill_value_buf() will handle the xv pointer.
 187         */
 188        void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
 189                                   struct ocfs2_xattr_value_buf *vb);
 190};
 191
 192/*
 193 * Describes an xattr entry location.  This is a memory structure
 194 * tracking the on-disk structure.
 195 */
 196struct ocfs2_xa_loc {
 197        /* This xattr belongs to this inode */
 198        struct inode *xl_inode;
 199
 200        /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
 201        struct ocfs2_xattr_header *xl_header;
 202
 203        /* Bytes from xl_header to the end of the storage */
 204        int xl_size;
 205
 206        /*
 207         * The ocfs2_xattr_entry this location describes.  If this is
 208         * NULL, this location describes the on-disk structure where it
 209         * would have been.
 210         */
 211        struct ocfs2_xattr_entry *xl_entry;
 212
 213        /*
 214         * Internal housekeeping
 215         */
 216
 217        /* Buffer(s) containing this entry */
 218        void *xl_storage;
 219
 220        /* Operations on the storage backing this location */
 221        const struct ocfs2_xa_loc_operations *xl_ops;
 222};
 223
 224/*
 225 * Convenience functions to calculate how much space is needed for a
 226 * given name+value pair
 227 */
 228static int namevalue_size(int name_len, uint64_t value_len)
 229{
 230        if (value_len > OCFS2_XATTR_INLINE_SIZE)
 231                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 232        else
 233                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
 234}
 235
 236static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
 237{
 238        return namevalue_size(xi->xi_name_len, xi->xi_value_len);
 239}
 240
 241static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
 242{
 243        u64 value_len = le64_to_cpu(xe->xe_value_size);
 244
 245        BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
 246               ocfs2_xattr_is_local(xe));
 247        return namevalue_size(xe->xe_name_len, value_len);
 248}
 249
 250
 251static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 252                                             struct ocfs2_xattr_header *xh,
 253                                             int index,
 254                                             int *block_off,
 255                                             int *new_offset);
 256
 257static int ocfs2_xattr_block_find(struct inode *inode,
 258                                  int name_index,
 259                                  const char *name,
 260                                  struct ocfs2_xattr_search *xs);
 261static int ocfs2_xattr_index_block_find(struct inode *inode,
 262                                        struct buffer_head *root_bh,
 263                                        int name_index,
 264                                        const char *name,
 265                                        struct ocfs2_xattr_search *xs);
 266
 267static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 268                                        struct buffer_head *blk_bh,
 269                                        char *buffer,
 270                                        size_t buffer_size);
 271
 272static int ocfs2_xattr_create_index_block(struct inode *inode,
 273                                          struct ocfs2_xattr_search *xs,
 274                                          struct ocfs2_xattr_set_ctxt *ctxt);
 275
 276static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 277                                             struct ocfs2_xattr_info *xi,
 278                                             struct ocfs2_xattr_search *xs,
 279                                             struct ocfs2_xattr_set_ctxt *ctxt);
 280
 281typedef int (xattr_tree_rec_func)(struct inode *inode,
 282                                  struct buffer_head *root_bh,
 283                                  u64 blkno, u32 cpos, u32 len, void *para);
 284static int ocfs2_iterate_xattr_index_block(struct inode *inode,
 285                                           struct buffer_head *root_bh,
 286                                           xattr_tree_rec_func *rec_func,
 287                                           void *para);
 288static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 289                                        struct ocfs2_xattr_bucket *bucket,
 290                                        void *para);
 291static int ocfs2_rm_xattr_cluster(struct inode *inode,
 292                                  struct buffer_head *root_bh,
 293                                  u64 blkno,
 294                                  u32 cpos,
 295                                  u32 len,
 296                                  void *para);
 297
 298static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 299                                  u64 src_blk, u64 last_blk, u64 to_blk,
 300                                  unsigned int start_bucket,
 301                                  u32 *first_hash);
 302static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 303                                        struct ocfs2_dinode *di,
 304                                        struct ocfs2_xattr_info *xi,
 305                                        struct ocfs2_xattr_search *xis,
 306                                        struct ocfs2_xattr_search *xbs,
 307                                        struct ocfs2_refcount_tree **ref_tree,
 308                                        int *meta_need,
 309                                        int *credits);
 310static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 311                                           struct ocfs2_xattr_bucket *bucket,
 312                                           int offset,
 313                                           struct ocfs2_xattr_value_root **xv,
 314                                           struct buffer_head **bh);
 315
 316static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 317{
 318        return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
 319}
 320
 321static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
 322{
 323        return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
 324}
 325
 326#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
 327#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 328#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 329
 330static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
 331{
 332        struct ocfs2_xattr_bucket *bucket;
 333        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 334
 335        BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
 336
 337        bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
 338        if (bucket) {
 339                bucket->bu_inode = inode;
 340                bucket->bu_blocks = blks;
 341        }
 342
 343        return bucket;
 344}
 345
 346static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
 347{
 348        int i;
 349
 350        for (i = 0; i < bucket->bu_blocks; i++) {
 351                brelse(bucket->bu_bhs[i]);
 352                bucket->bu_bhs[i] = NULL;
 353        }
 354}
 355
 356static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
 357{
 358        if (bucket) {
 359                ocfs2_xattr_bucket_relse(bucket);
 360                bucket->bu_inode = NULL;
 361                kfree(bucket);
 362        }
 363}
 364
 365/*
 366 * A bucket that has never been written to disk doesn't need to be
 367 * read.  We just need the buffer_heads.  Don't call this for
 368 * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
 369 * them fully.
 370 */
 371static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 372                                   u64 xb_blkno, int new)
 373{
 374        int i, rc = 0;
 375
 376        for (i = 0; i < bucket->bu_blocks; i++) {
 377                bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
 378                                              xb_blkno + i);
 379                if (!bucket->bu_bhs[i]) {
 380                        rc = -ENOMEM;
 381                        mlog_errno(rc);
 382                        break;
 383                }
 384
 385                if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 386                                           bucket->bu_bhs[i])) {
 387                        if (new)
 388                                ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 389                                                              bucket->bu_bhs[i]);
 390                        else {
 391                                set_buffer_uptodate(bucket->bu_bhs[i]);
 392                                ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 393                                                          bucket->bu_bhs[i]);
 394                        }
 395                }
 396        }
 397
 398        if (rc)
 399                ocfs2_xattr_bucket_relse(bucket);
 400        return rc;
 401}
 402
 403/* Read the xattr bucket at xb_blkno */
 404static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 405                                   u64 xb_blkno)
 406{
 407        int rc;
 408
 409        rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 410                               bucket->bu_blocks, bucket->bu_bhs, 0,
 411                               NULL);
 412        if (!rc) {
 413                spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 414                rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
 415                                                 bucket->bu_bhs,
 416                                                 bucket->bu_blocks,
 417                                                 &bucket_xh(bucket)->xh_check);
 418                spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 419                if (rc)
 420                        mlog_errno(rc);
 421        }
 422
 423        if (rc)
 424                ocfs2_xattr_bucket_relse(bucket);
 425        return rc;
 426}
 427
 428static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 429                                             struct ocfs2_xattr_bucket *bucket,
 430                                             int type)
 431{
 432        int i, rc = 0;
 433
 434        for (i = 0; i < bucket->bu_blocks; i++) {
 435                rc = ocfs2_journal_access(handle,
 436                                          INODE_CACHE(bucket->bu_inode),
 437                                          bucket->bu_bhs[i], type);
 438                if (rc) {
 439                        mlog_errno(rc);
 440                        break;
 441                }
 442        }
 443
 444        return rc;
 445}
 446
 447static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 448                                             struct ocfs2_xattr_bucket *bucket)
 449{
 450        int i;
 451
 452        spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 453        ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
 454                                   bucket->bu_bhs, bucket->bu_blocks,
 455                                   &bucket_xh(bucket)->xh_check);
 456        spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 457
 458        for (i = 0; i < bucket->bu_blocks; i++)
 459                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 460}
 461
 462static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 463                                         struct ocfs2_xattr_bucket *src)
 464{
 465        int i;
 466        int blocksize = src->bu_inode->i_sb->s_blocksize;
 467
 468        BUG_ON(dest->bu_blocks != src->bu_blocks);
 469        BUG_ON(dest->bu_inode != src->bu_inode);
 470
 471        for (i = 0; i < src->bu_blocks; i++) {
 472                memcpy(bucket_block(dest, i), bucket_block(src, i),
 473                       blocksize);
 474        }
 475}
 476
 477static int ocfs2_validate_xattr_block(struct super_block *sb,
 478                                      struct buffer_head *bh)
 479{
 480        int rc;
 481        struct ocfs2_xattr_block *xb =
 482                (struct ocfs2_xattr_block *)bh->b_data;
 483
 484        trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
 485
 486        BUG_ON(!buffer_uptodate(bh));
 487
 488        /*
 489         * If the ecc fails, we return the error but otherwise
 490         * leave the filesystem running.  We know any error is
 491         * local to this block.
 492         */
 493        rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
 494        if (rc)
 495                return rc;
 496
 497        /*
 498         * Errors after here are fatal
 499         */
 500
 501        if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
 502                return ocfs2_error(sb,
 503                                   "Extended attribute block #%llu has bad signature %.*s\n",
 504                                   (unsigned long long)bh->b_blocknr, 7,
 505                                   xb->xb_signature);
 506        }
 507
 508        if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
 509                return ocfs2_error(sb,
 510                                   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
 511                                   (unsigned long long)bh->b_blocknr,
 512                                   (unsigned long long)le64_to_cpu(xb->xb_blkno));
 513        }
 514
 515        if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
 516                return ocfs2_error(sb,
 517                                   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
 518                                   (unsigned long long)bh->b_blocknr,
 519                                   le32_to_cpu(xb->xb_fs_generation));
 520        }
 521
 522        return 0;
 523}
 524
 525static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 526                                  struct buffer_head **bh)
 527{
 528        int rc;
 529        struct buffer_head *tmp = *bh;
 530
 531        rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 532                              ocfs2_validate_xattr_block);
 533
 534        /* If ocfs2_read_block() got us a new bh, pass it up. */
 535        if (!rc && !*bh)
 536                *bh = tmp;
 537
 538        return rc;
 539}
 540
 541static inline const char *ocfs2_xattr_prefix(int name_index)
 542{
 543        const struct xattr_handler *handler = NULL;
 544
 545        if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
 546                handler = ocfs2_xattr_handler_map[name_index];
 547
 548        return handler ? handler->prefix : NULL;
 549}
 550
 551static u32 ocfs2_xattr_name_hash(struct inode *inode,
 552                                 const char *name,
 553                                 int name_len)
 554{
 555        /* Get hash value of uuid from super block */
 556        u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
 557        int i;
 558
 559        /* hash extended attribute name */
 560        for (i = 0; i < name_len; i++) {
 561                hash = (hash << OCFS2_HASH_SHIFT) ^
 562                       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
 563                       *name++;
 564        }
 565
 566        return hash;
 567}
 568
 569static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
 570{
 571        return namevalue_size(name_len, value_len) +
 572                sizeof(struct ocfs2_xattr_entry);
 573}
 574
 575static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
 576{
 577        return namevalue_size_xi(xi) +
 578                sizeof(struct ocfs2_xattr_entry);
 579}
 580
 581static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
 582{
 583        return namevalue_size_xe(xe) +
 584                sizeof(struct ocfs2_xattr_entry);
 585}
 586
 587int ocfs2_calc_security_init(struct inode *dir,
 588                             struct ocfs2_security_xattr_info *si,
 589                             int *want_clusters,
 590                             int *xattr_credits,
 591                             struct ocfs2_alloc_context **xattr_ac)
 592{
 593        int ret = 0;
 594        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 595        int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 596                                                 si->value_len);
 597
 598        /*
 599         * The max space of security xattr taken inline is
 600         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 601         * So reserve one metadata block for it is ok.
 602         */
 603        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 604            s_size > OCFS2_XATTR_FREE_IN_IBODY) {
 605                ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
 606                if (ret) {
 607                        mlog_errno(ret);
 608                        return ret;
 609                }
 610                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 611        }
 612
 613        /* reserve clusters for xattr value which will be set in B tree*/
 614        if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 615                int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 616                                                            si->value_len);
 617
 618                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 619                                                           new_clusters);
 620                *want_clusters += new_clusters;
 621        }
 622        return ret;
 623}
 624
 625int ocfs2_calc_xattr_init(struct inode *dir,
 626                          struct buffer_head *dir_bh,
 627                          umode_t mode,
 628                          struct ocfs2_security_xattr_info *si,
 629                          int *want_clusters,
 630                          int *xattr_credits,
 631                          int *want_meta)
 632{
 633        int ret = 0;
 634        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 635        int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
 636
 637        if (si->enable)
 638                s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 639                                                     si->value_len);
 640
 641        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
 642                acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
 643                                        OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
 644                                        "", NULL, 0);
 645                if (acl_len > 0) {
 646                        a_size = ocfs2_xattr_entry_real_size(0, acl_len);
 647                        if (S_ISDIR(mode))
 648                                a_size <<= 1;
 649                } else if (acl_len != 0 && acl_len != -ENODATA) {
 650                        mlog_errno(ret);
 651                        return ret;
 652                }
 653        }
 654
 655        if (!(s_size + a_size))
 656                return ret;
 657
 658        /*
 659         * The max space of security xattr taken inline is
 660         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 661         * The max space of acl xattr taken inline is
 662         * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
 663         * when blocksize = 512, may reserve one more cluser for
 664         * xattr bucket, otherwise reserve one metadata block
 665         * for them is ok.
 666         * If this is a new directory with inline data,
 667         * we choose to reserve the entire inline area for
 668         * directory contents and force an external xattr block.
 669         */
 670        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 671            (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
 672            (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
 673                *want_meta = *want_meta + 1;
 674                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 675        }
 676
 677        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
 678            (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
 679                *want_clusters += 1;
 680                *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
 681        }
 682
 683        /*
 684         * reserve credits and clusters for xattrs which has large value
 685         * and have to be set outside
 686         */
 687        if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 688                new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 689                                                        si->value_len);
 690                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 691                                                           new_clusters);
 692                *want_clusters += new_clusters;
 693        }
 694        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
 695            acl_len > OCFS2_XATTR_INLINE_SIZE) {
 696                /* for directory, it has DEFAULT and ACCESS two types of acls */
 697                new_clusters = (S_ISDIR(mode) ? 2 : 1) *
 698                                ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
 699                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 700                                                           new_clusters);
 701                *want_clusters += new_clusters;
 702        }
 703
 704        return ret;
 705}
 706
 707static int ocfs2_xattr_extend_allocation(struct inode *inode,
 708                                         u32 clusters_to_add,
 709                                         struct ocfs2_xattr_value_buf *vb,
 710                                         struct ocfs2_xattr_set_ctxt *ctxt)
 711{
 712        int status = 0, credits;
 713        handle_t *handle = ctxt->handle;
 714        enum ocfs2_alloc_restarted why;
 715        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 716        struct ocfs2_extent_tree et;
 717
 718        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 719
 720        while (clusters_to_add) {
 721                trace_ocfs2_xattr_extend_allocation(clusters_to_add);
 722
 723                status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 724                                       OCFS2_JOURNAL_ACCESS_WRITE);
 725                if (status < 0) {
 726                        mlog_errno(status);
 727                        break;
 728                }
 729
 730                prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 731                status = ocfs2_add_clusters_in_btree(handle,
 732                                                     &et,
 733                                                     &logical_start,
 734                                                     clusters_to_add,
 735                                                     0,
 736                                                     ctxt->data_ac,
 737                                                     ctxt->meta_ac,
 738                                                     &why);
 739                if ((status < 0) && (status != -EAGAIN)) {
 740                        if (status != -ENOSPC)
 741                                mlog_errno(status);
 742                        break;
 743                }
 744
 745                ocfs2_journal_dirty(handle, vb->vb_bh);
 746
 747                clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
 748                                         prev_clusters;
 749
 750                if (why != RESTART_NONE && clusters_to_add) {
 751                        /*
 752                         * We can only fail in case the alloc file doesn't give
 753                         * up enough clusters.
 754                         */
 755                        BUG_ON(why == RESTART_META);
 756
 757                        credits = ocfs2_calc_extend_credits(inode->i_sb,
 758                                                            &vb->vb_xv->xr_list);
 759                        status = ocfs2_extend_trans(handle, credits);
 760                        if (status < 0) {
 761                                status = -ENOMEM;
 762                                mlog_errno(status);
 763                                break;
 764                        }
 765                }
 766        }
 767
 768        return status;
 769}
 770
 771static int __ocfs2_remove_xattr_range(struct inode *inode,
 772                                      struct ocfs2_xattr_value_buf *vb,
 773                                      u32 cpos, u32 phys_cpos, u32 len,
 774                                      unsigned int ext_flags,
 775                                      struct ocfs2_xattr_set_ctxt *ctxt)
 776{
 777        int ret;
 778        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 779        handle_t *handle = ctxt->handle;
 780        struct ocfs2_extent_tree et;
 781
 782        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 783
 784        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 785                            OCFS2_JOURNAL_ACCESS_WRITE);
 786        if (ret) {
 787                mlog_errno(ret);
 788                goto out;
 789        }
 790
 791        ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 792                                  &ctxt->dealloc);
 793        if (ret) {
 794                mlog_errno(ret);
 795                goto out;
 796        }
 797
 798        le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
 799        ocfs2_journal_dirty(handle, vb->vb_bh);
 800
 801        if (ext_flags & OCFS2_EXT_REFCOUNTED)
 802                ret = ocfs2_decrease_refcount(inode, handle,
 803                                        ocfs2_blocks_to_clusters(inode->i_sb,
 804                                                                 phys_blkno),
 805                                        len, ctxt->meta_ac, &ctxt->dealloc, 1);
 806        else
 807                ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
 808                                                  phys_blkno, len);
 809        if (ret)
 810                mlog_errno(ret);
 811
 812out:
 813        return ret;
 814}
 815
 816static int ocfs2_xattr_shrink_size(struct inode *inode,
 817                                   u32 old_clusters,
 818                                   u32 new_clusters,
 819                                   struct ocfs2_xattr_value_buf *vb,
 820                                   struct ocfs2_xattr_set_ctxt *ctxt)
 821{
 822        int ret = 0;
 823        unsigned int ext_flags;
 824        u32 trunc_len, cpos, phys_cpos, alloc_size;
 825        u64 block;
 826
 827        if (old_clusters <= new_clusters)
 828                return 0;
 829
 830        cpos = new_clusters;
 831        trunc_len = old_clusters - new_clusters;
 832        while (trunc_len) {
 833                ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 834                                               &alloc_size,
 835                                               &vb->vb_xv->xr_list, &ext_flags);
 836                if (ret) {
 837                        mlog_errno(ret);
 838                        goto out;
 839                }
 840
 841                if (alloc_size > trunc_len)
 842                        alloc_size = trunc_len;
 843
 844                ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 845                                                 phys_cpos, alloc_size,
 846                                                 ext_flags, ctxt);
 847                if (ret) {
 848                        mlog_errno(ret);
 849                        goto out;
 850                }
 851
 852                block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 853                ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
 854                                                       block, alloc_size);
 855                cpos += alloc_size;
 856                trunc_len -= alloc_size;
 857        }
 858
 859out:
 860        return ret;
 861}
 862
 863static int ocfs2_xattr_value_truncate(struct inode *inode,
 864                                      struct ocfs2_xattr_value_buf *vb,
 865                                      int len,
 866                                      struct ocfs2_xattr_set_ctxt *ctxt)
 867{
 868        int ret;
 869        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
 870        u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 871
 872        if (new_clusters == old_clusters)
 873                return 0;
 874
 875        if (new_clusters > old_clusters)
 876                ret = ocfs2_xattr_extend_allocation(inode,
 877                                                    new_clusters - old_clusters,
 878                                                    vb, ctxt);
 879        else
 880                ret = ocfs2_xattr_shrink_size(inode,
 881                                              old_clusters, new_clusters,
 882                                              vb, ctxt);
 883
 884        return ret;
 885}
 886
 887static int ocfs2_xattr_list_entry(char *buffer, size_t size,
 888                                  size_t *result, const char *prefix,
 889                                  const char *name, int name_len)
 890{
 891        char *p = buffer + *result;
 892        int prefix_len = strlen(prefix);
 893        int total_len = prefix_len + name_len + 1;
 894
 895        *result += total_len;
 896
 897        /* we are just looking for how big our buffer needs to be */
 898        if (!size)
 899                return 0;
 900
 901        if (*result > size)
 902                return -ERANGE;
 903
 904        memcpy(p, prefix, prefix_len);
 905        memcpy(p + prefix_len, name, name_len);
 906        p[prefix_len + name_len] = '\0';
 907
 908        return 0;
 909}
 910
 911static int ocfs2_xattr_list_entries(struct inode *inode,
 912                                    struct ocfs2_xattr_header *header,
 913                                    char *buffer, size_t buffer_size)
 914{
 915        size_t result = 0;
 916        int i, type, ret;
 917        const char *prefix, *name;
 918
 919        for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
 920                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 921                type = ocfs2_xattr_get_type(entry);
 922                prefix = ocfs2_xattr_prefix(type);
 923
 924                if (prefix) {
 925                        name = (const char *)header +
 926                                le16_to_cpu(entry->xe_name_offset);
 927
 928                        ret = ocfs2_xattr_list_entry(buffer, buffer_size,
 929                                                     &result, prefix, name,
 930                                                     entry->xe_name_len);
 931                        if (ret)
 932                                return ret;
 933                }
 934        }
 935
 936        return result;
 937}
 938
 939int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
 940                                         struct ocfs2_dinode *di)
 941{
 942        struct ocfs2_xattr_header *xh;
 943        int i;
 944
 945        xh = (struct ocfs2_xattr_header *)
 946                 ((void *)di + inode->i_sb->s_blocksize -
 947                 le16_to_cpu(di->i_xattr_inline_size));
 948
 949        for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
 950                if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
 951                        return 1;
 952
 953        return 0;
 954}
 955
 956static int ocfs2_xattr_ibody_list(struct inode *inode,
 957                                  struct ocfs2_dinode *di,
 958                                  char *buffer,
 959                                  size_t buffer_size)
 960{
 961        struct ocfs2_xattr_header *header = NULL;
 962        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 963        int ret = 0;
 964
 965        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
 966                return ret;
 967
 968        header = (struct ocfs2_xattr_header *)
 969                 ((void *)di + inode->i_sb->s_blocksize -
 970                 le16_to_cpu(di->i_xattr_inline_size));
 971
 972        ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
 973
 974        return ret;
 975}
 976
 977static int ocfs2_xattr_block_list(struct inode *inode,
 978                                  struct ocfs2_dinode *di,
 979                                  char *buffer,
 980                                  size_t buffer_size)
 981{
 982        struct buffer_head *blk_bh = NULL;
 983        struct ocfs2_xattr_block *xb;
 984        int ret = 0;
 985
 986        if (!di->i_xattr_loc)
 987                return ret;
 988
 989        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
 990                                     &blk_bh);
 991        if (ret < 0) {
 992                mlog_errno(ret);
 993                return ret;
 994        }
 995
 996        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 997        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 998                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
 999                ret = ocfs2_xattr_list_entries(inode, header,
1000                                               buffer, buffer_size);
1001        } else
1002                ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1003                                                   buffer, buffer_size);
1004
1005        brelse(blk_bh);
1006
1007        return ret;
1008}
1009
1010ssize_t ocfs2_listxattr(struct dentry *dentry,
1011                        char *buffer,
1012                        size_t size)
1013{
1014        int ret = 0, i_ret = 0, b_ret = 0;
1015        struct buffer_head *di_bh = NULL;
1016        struct ocfs2_dinode *di = NULL;
1017        struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1018
1019        if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1020                return -EOPNOTSUPP;
1021
1022        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1023                return ret;
1024
1025        ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1026        if (ret < 0) {
1027                mlog_errno(ret);
1028                return ret;
1029        }
1030
1031        di = (struct ocfs2_dinode *)di_bh->b_data;
1032
1033        down_read(&oi->ip_xattr_sem);
1034        i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1035        if (i_ret < 0)
1036                b_ret = 0;
1037        else {
1038                if (buffer) {
1039                        buffer += i_ret;
1040                        size -= i_ret;
1041                }
1042                b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1043                                               buffer, size);
1044                if (b_ret < 0)
1045                        i_ret = 0;
1046        }
1047        up_read(&oi->ip_xattr_sem);
1048        ocfs2_inode_unlock(d_inode(dentry), 0);
1049
1050        brelse(di_bh);
1051
1052        return i_ret + b_ret;
1053}
1054
1055static int ocfs2_xattr_find_entry(int name_index,
1056                                  const char *name,
1057                                  struct ocfs2_xattr_search *xs)
1058{
1059        struct ocfs2_xattr_entry *entry;
1060        size_t name_len;
1061        int i, cmp = 1;
1062
1063        if (name == NULL)
1064                return -EINVAL;
1065
1066        name_len = strlen(name);
1067        entry = xs->here;
1068        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1069                cmp = name_index - ocfs2_xattr_get_type(entry);
1070                if (!cmp)
1071                        cmp = name_len - entry->xe_name_len;
1072                if (!cmp)
1073                        cmp = memcmp(name, (xs->base +
1074                                     le16_to_cpu(entry->xe_name_offset)),
1075                                     name_len);
1076                if (cmp == 0)
1077                        break;
1078                entry += 1;
1079        }
1080        xs->here = entry;
1081
1082        return cmp ? -ENODATA : 0;
1083}
1084
1085static int ocfs2_xattr_get_value_outside(struct inode *inode,
1086                                         struct ocfs2_xattr_value_root *xv,
1087                                         void *buffer,
1088                                         size_t len)
1089{
1090        u32 cpos, p_cluster, num_clusters, bpc, clusters;
1091        u64 blkno;
1092        int i, ret = 0;
1093        size_t cplen, blocksize;
1094        struct buffer_head *bh = NULL;
1095        struct ocfs2_extent_list *el;
1096
1097        el = &xv->xr_list;
1098        clusters = le32_to_cpu(xv->xr_clusters);
1099        bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1100        blocksize = inode->i_sb->s_blocksize;
1101
1102        cpos = 0;
1103        while (cpos < clusters) {
1104                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1105                                               &num_clusters, el, NULL);
1106                if (ret) {
1107                        mlog_errno(ret);
1108                        goto out;
1109                }
1110
1111                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1112                /* Copy ocfs2_xattr_value */
1113                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1114                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1115                                               &bh, NULL);
1116                        if (ret) {
1117                                mlog_errno(ret);
1118                                goto out;
1119                        }
1120
1121                        cplen = len >= blocksize ? blocksize : len;
1122                        memcpy(buffer, bh->b_data, cplen);
1123                        len -= cplen;
1124                        buffer += cplen;
1125
1126                        brelse(bh);
1127                        bh = NULL;
1128                        if (len == 0)
1129                                break;
1130                }
1131                cpos += num_clusters;
1132        }
1133out:
1134        return ret;
1135}
1136
1137static int ocfs2_xattr_ibody_get(struct inode *inode,
1138                                 int name_index,
1139                                 const char *name,
1140                                 void *buffer,
1141                                 size_t buffer_size,
1142                                 struct ocfs2_xattr_search *xs)
1143{
1144        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1145        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1146        struct ocfs2_xattr_value_root *xv;
1147        size_t size;
1148        int ret = 0;
1149
1150        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1151                return -ENODATA;
1152
1153        xs->end = (void *)di + inode->i_sb->s_blocksize;
1154        xs->header = (struct ocfs2_xattr_header *)
1155                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1156        xs->base = (void *)xs->header;
1157        xs->here = xs->header->xh_entries;
1158
1159        ret = ocfs2_xattr_find_entry(name_index, name, xs);
1160        if (ret)
1161                return ret;
1162        size = le64_to_cpu(xs->here->xe_value_size);
1163        if (buffer) {
1164                if (size > buffer_size)
1165                        return -ERANGE;
1166                if (ocfs2_xattr_is_local(xs->here)) {
1167                        memcpy(buffer, (void *)xs->base +
1168                               le16_to_cpu(xs->here->xe_name_offset) +
1169                               OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1170                } else {
1171                        xv = (struct ocfs2_xattr_value_root *)
1172                                (xs->base + le16_to_cpu(
1173                                 xs->here->xe_name_offset) +
1174                                OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1175                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1176                                                            buffer, size);
1177                        if (ret < 0) {
1178                                mlog_errno(ret);
1179                                return ret;
1180                        }
1181                }
1182        }
1183
1184        return size;
1185}
1186
1187static int ocfs2_xattr_block_get(struct inode *inode,
1188                                 int name_index,
1189                                 const char *name,
1190                                 void *buffer,
1191                                 size_t buffer_size,
1192                                 struct ocfs2_xattr_search *xs)
1193{
1194        struct ocfs2_xattr_block *xb;
1195        struct ocfs2_xattr_value_root *xv;
1196        size_t size;
1197        int ret = -ENODATA, name_offset, name_len, i;
1198        int uninitialized_var(block_off);
1199
1200        xs->bucket = ocfs2_xattr_bucket_new(inode);
1201        if (!xs->bucket) {
1202                ret = -ENOMEM;
1203                mlog_errno(ret);
1204                goto cleanup;
1205        }
1206
1207        ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1208        if (ret) {
1209                mlog_errno(ret);
1210                goto cleanup;
1211        }
1212
1213        if (xs->not_found) {
1214                ret = -ENODATA;
1215                goto cleanup;
1216        }
1217
1218        xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1219        size = le64_to_cpu(xs->here->xe_value_size);
1220        if (buffer) {
1221                ret = -ERANGE;
1222                if (size > buffer_size)
1223                        goto cleanup;
1224
1225                name_offset = le16_to_cpu(xs->here->xe_name_offset);
1226                name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1227                i = xs->here - xs->header->xh_entries;
1228
1229                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1230                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1231                                                                bucket_xh(xs->bucket),
1232                                                                i,
1233                                                                &block_off,
1234                                                                &name_offset);
1235                        if (ret) {
1236                                mlog_errno(ret);
1237                                goto cleanup;
1238                        }
1239                        xs->base = bucket_block(xs->bucket, block_off);
1240                }
1241                if (ocfs2_xattr_is_local(xs->here)) {
1242                        memcpy(buffer, (void *)xs->base +
1243                               name_offset + name_len, size);
1244                } else {
1245                        xv = (struct ocfs2_xattr_value_root *)
1246                                (xs->base + name_offset + name_len);
1247                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1248                                                            buffer, size);
1249                        if (ret < 0) {
1250                                mlog_errno(ret);
1251                                goto cleanup;
1252                        }
1253                }
1254        }
1255        ret = size;
1256cleanup:
1257        ocfs2_xattr_bucket_free(xs->bucket);
1258
1259        brelse(xs->xattr_bh);
1260        xs->xattr_bh = NULL;
1261        return ret;
1262}
1263
1264int ocfs2_xattr_get_nolock(struct inode *inode,
1265                           struct buffer_head *di_bh,
1266                           int name_index,
1267                           const char *name,
1268                           void *buffer,
1269                           size_t buffer_size)
1270{
1271        int ret;
1272        struct ocfs2_dinode *di = NULL;
1273        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1274        struct ocfs2_xattr_search xis = {
1275                .not_found = -ENODATA,
1276        };
1277        struct ocfs2_xattr_search xbs = {
1278                .not_found = -ENODATA,
1279        };
1280
1281        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1282                return -EOPNOTSUPP;
1283
1284        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1285                return -ENODATA;
1286
1287        xis.inode_bh = xbs.inode_bh = di_bh;
1288        di = (struct ocfs2_dinode *)di_bh->b_data;
1289
1290        ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1291                                    buffer_size, &xis);
1292        if (ret == -ENODATA && di->i_xattr_loc)
1293                ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1294                                            buffer_size, &xbs);
1295
1296        return ret;
1297}
1298
1299/* ocfs2_xattr_get()
1300 *
1301 * Copy an extended attribute into the buffer provided.
1302 * Buffer is NULL to compute the size of buffer required.
1303 */
1304static int ocfs2_xattr_get(struct inode *inode,
1305                           int name_index,
1306                           const char *name,
1307                           void *buffer,
1308                           size_t buffer_size)
1309{
1310        int ret;
1311        struct buffer_head *di_bh = NULL;
1312
1313        ret = ocfs2_inode_lock(inode, &di_bh, 0);
1314        if (ret < 0) {
1315                mlog_errno(ret);
1316                return ret;
1317        }
1318        down_read(&OCFS2_I(inode)->ip_xattr_sem);
1319        ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1320                                     name, buffer, buffer_size);
1321        up_read(&OCFS2_I(inode)->ip_xattr_sem);
1322
1323        ocfs2_inode_unlock(inode, 0);
1324
1325        brelse(di_bh);
1326
1327        return ret;
1328}
1329
1330static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1331                                           handle_t *handle,
1332                                           struct ocfs2_xattr_value_buf *vb,
1333                                           const void *value,
1334                                           int value_len)
1335{
1336        int ret = 0, i, cp_len;
1337        u16 blocksize = inode->i_sb->s_blocksize;
1338        u32 p_cluster, num_clusters;
1339        u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1340        u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1341        u64 blkno;
1342        struct buffer_head *bh = NULL;
1343        unsigned int ext_flags;
1344        struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1345
1346        BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1347
1348        while (cpos < clusters) {
1349                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1350                                               &num_clusters, &xv->xr_list,
1351                                               &ext_flags);
1352                if (ret) {
1353                        mlog_errno(ret);
1354                        goto out;
1355                }
1356
1357                BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1358
1359                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1360
1361                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1362                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1363                                               &bh, NULL);
1364                        if (ret) {
1365                                mlog_errno(ret);
1366                                goto out;
1367                        }
1368
1369                        ret = ocfs2_journal_access(handle,
1370                                                   INODE_CACHE(inode),
1371                                                   bh,
1372                                                   OCFS2_JOURNAL_ACCESS_WRITE);
1373                        if (ret < 0) {
1374                                mlog_errno(ret);
1375                                goto out;
1376                        }
1377
1378                        cp_len = value_len > blocksize ? blocksize : value_len;
1379                        memcpy(bh->b_data, value, cp_len);
1380                        value_len -= cp_len;
1381                        value += cp_len;
1382                        if (cp_len < blocksize)
1383                                memset(bh->b_data + cp_len, 0,
1384                                       blocksize - cp_len);
1385
1386                        ocfs2_journal_dirty(handle, bh);
1387                        brelse(bh);
1388                        bh = NULL;
1389
1390                        /*
1391                         * XXX: do we need to empty all the following
1392                         * blocks in this cluster?
1393                         */
1394                        if (!value_len)
1395                                break;
1396                }
1397                cpos += num_clusters;
1398        }
1399out:
1400        brelse(bh);
1401
1402        return ret;
1403}
1404
1405static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1406                                       int num_entries)
1407{
1408        int free_space;
1409
1410        if (!needed_space)
1411                return 0;
1412
1413        free_space = free_start -
1414                sizeof(struct ocfs2_xattr_header) -
1415                (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1416                OCFS2_XATTR_HEADER_GAP;
1417        if (free_space < 0)
1418                return -EIO;
1419        if (free_space < needed_space)
1420                return -ENOSPC;
1421
1422        return 0;
1423}
1424
1425static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1426                                   int type)
1427{
1428        return loc->xl_ops->xlo_journal_access(handle, loc, type);
1429}
1430
1431static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1432{
1433        loc->xl_ops->xlo_journal_dirty(handle, loc);
1434}
1435
1436/* Give a pointer into the storage for the given offset */
1437static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1438{
1439        BUG_ON(offset >= loc->xl_size);
1440        return loc->xl_ops->xlo_offset_pointer(loc, offset);
1441}
1442
1443/*
1444 * Wipe the name+value pair and allow the storage to reclaim it.  This
1445 * must be followed by either removal of the entry or a call to
1446 * ocfs2_xa_add_namevalue().
1447 */
1448static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1449{
1450        loc->xl_ops->xlo_wipe_namevalue(loc);
1451}
1452
1453/*
1454 * Find lowest offset to a name+value pair.  This is the start of our
1455 * downward-growing free space.
1456 */
1457static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1458{
1459        return loc->xl_ops->xlo_get_free_start(loc);
1460}
1461
1462/* Can we reuse loc->xl_entry for xi? */
1463static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1464                                    struct ocfs2_xattr_info *xi)
1465{
1466        return loc->xl_ops->xlo_can_reuse(loc, xi);
1467}
1468
1469/* How much free space is needed to set the new value */
1470static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1471                                struct ocfs2_xattr_info *xi)
1472{
1473        return loc->xl_ops->xlo_check_space(loc, xi);
1474}
1475
1476static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1477{
1478        loc->xl_ops->xlo_add_entry(loc, name_hash);
1479        loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1480        /*
1481         * We can't leave the new entry's xe_name_offset at zero or
1482         * add_namevalue() will go nuts.  We set it to the size of our
1483         * storage so that it can never be less than any other entry.
1484         */
1485        loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1486}
1487
1488static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1489                                   struct ocfs2_xattr_info *xi)
1490{
1491        int size = namevalue_size_xi(xi);
1492        int nameval_offset;
1493        char *nameval_buf;
1494
1495        loc->xl_ops->xlo_add_namevalue(loc, size);
1496        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1497        loc->xl_entry->xe_name_len = xi->xi_name_len;
1498        ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1499        ocfs2_xattr_set_local(loc->xl_entry,
1500                              xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1501
1502        nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1503        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1504        memset(nameval_buf, 0, size);
1505        memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1506}
1507
1508static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1509                                    struct ocfs2_xattr_value_buf *vb)
1510{
1511        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1512        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1513
1514        /* Value bufs are for value trees */
1515        BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1516        BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1517               (name_size + OCFS2_XATTR_ROOT_SIZE));
1518
1519        loc->xl_ops->xlo_fill_value_buf(loc, vb);
1520        vb->vb_xv =
1521                (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1522                                                        nameval_offset +
1523                                                        name_size);
1524}
1525
1526static int ocfs2_xa_block_journal_access(handle_t *handle,
1527                                         struct ocfs2_xa_loc *loc, int type)
1528{
1529        struct buffer_head *bh = loc->xl_storage;
1530        ocfs2_journal_access_func access;
1531
1532        if (loc->xl_size == (bh->b_size -
1533                             offsetof(struct ocfs2_xattr_block,
1534                                      xb_attrs.xb_header)))
1535                access = ocfs2_journal_access_xb;
1536        else
1537                access = ocfs2_journal_access_di;
1538        return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1539}
1540
1541static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1542                                         struct ocfs2_xa_loc *loc)
1543{
1544        struct buffer_head *bh = loc->xl_storage;
1545
1546        ocfs2_journal_dirty(handle, bh);
1547}
1548
1549static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1550                                           int offset)
1551{
1552        return (char *)loc->xl_header + offset;
1553}
1554
1555static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1556                                    struct ocfs2_xattr_info *xi)
1557{
1558        /*
1559         * Block storage is strict.  If the sizes aren't exact, we will
1560         * remove the old one and reinsert the new.
1561         */
1562        return namevalue_size_xe(loc->xl_entry) ==
1563                namevalue_size_xi(xi);
1564}
1565
1566static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1567{
1568        struct ocfs2_xattr_header *xh = loc->xl_header;
1569        int i, count = le16_to_cpu(xh->xh_count);
1570        int offset, free_start = loc->xl_size;
1571
1572        for (i = 0; i < count; i++) {
1573                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1574                if (offset < free_start)
1575                        free_start = offset;
1576        }
1577
1578        return free_start;
1579}
1580
1581static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1582                                      struct ocfs2_xattr_info *xi)
1583{
1584        int count = le16_to_cpu(loc->xl_header->xh_count);
1585        int free_start = ocfs2_xa_get_free_start(loc);
1586        int needed_space = ocfs2_xi_entry_usage(xi);
1587
1588        /*
1589         * Block storage will reclaim the original entry before inserting
1590         * the new value, so we only need the difference.  If the new
1591         * entry is smaller than the old one, we don't need anything.
1592         */
1593        if (loc->xl_entry) {
1594                /* Don't need space if we're reusing! */
1595                if (ocfs2_xa_can_reuse_entry(loc, xi))
1596                        needed_space = 0;
1597                else
1598                        needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1599        }
1600        if (needed_space < 0)
1601                needed_space = 0;
1602        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1603}
1604
1605/*
1606 * Block storage for xattrs keeps the name+value pairs compacted.  When
1607 * we remove one, we have to shift any that preceded it towards the end.
1608 */
1609static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1610{
1611        int i, offset;
1612        int namevalue_offset, first_namevalue_offset, namevalue_size;
1613        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1614        struct ocfs2_xattr_header *xh = loc->xl_header;
1615        int count = le16_to_cpu(xh->xh_count);
1616
1617        namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1618        namevalue_size = namevalue_size_xe(entry);
1619        first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1620
1621        /* Shift the name+value pairs */
1622        memmove((char *)xh + first_namevalue_offset + namevalue_size,
1623                (char *)xh + first_namevalue_offset,
1624                namevalue_offset - first_namevalue_offset);
1625        memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1626
1627        /* Now tell xh->xh_entries about it */
1628        for (i = 0; i < count; i++) {
1629                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1630                if (offset <= namevalue_offset)
1631                        le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1632                                     namevalue_size);
1633        }
1634
1635        /*
1636         * Note that we don't update xh_free_start or xh_name_value_len
1637         * because they're not used in block-stored xattrs.
1638         */
1639}
1640
1641static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1642{
1643        int count = le16_to_cpu(loc->xl_header->xh_count);
1644        loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1645        le16_add_cpu(&loc->xl_header->xh_count, 1);
1646        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1647}
1648
1649static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1650{
1651        int free_start = ocfs2_xa_get_free_start(loc);
1652
1653        loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1654}
1655
1656static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1657                                          struct ocfs2_xattr_value_buf *vb)
1658{
1659        struct buffer_head *bh = loc->xl_storage;
1660
1661        if (loc->xl_size == (bh->b_size -
1662                             offsetof(struct ocfs2_xattr_block,
1663                                      xb_attrs.xb_header)))
1664                vb->vb_access = ocfs2_journal_access_xb;
1665        else
1666                vb->vb_access = ocfs2_journal_access_di;
1667        vb->vb_bh = bh;
1668}
1669
1670/*
1671 * Operations for xattrs stored in blocks.  This includes inline inode
1672 * storage and unindexed ocfs2_xattr_blocks.
1673 */
1674static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1675        .xlo_journal_access     = ocfs2_xa_block_journal_access,
1676        .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1677        .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1678        .xlo_check_space        = ocfs2_xa_block_check_space,
1679        .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1680        .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1681        .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1682        .xlo_add_entry          = ocfs2_xa_block_add_entry,
1683        .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1684        .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1685};
1686
1687static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1688                                          struct ocfs2_xa_loc *loc, int type)
1689{
1690        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1691
1692        return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1693}
1694
1695static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1696                                          struct ocfs2_xa_loc *loc)
1697{
1698        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1699
1700        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1701}
1702
1703static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1704                                            int offset)
1705{
1706        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1707        int block, block_offset;
1708
1709        /* The header is at the front of the bucket */
1710        block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1711        block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1712
1713        return bucket_block(bucket, block) + block_offset;
1714}
1715
1716static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1717                                     struct ocfs2_xattr_info *xi)
1718{
1719        return namevalue_size_xe(loc->xl_entry) >=
1720                namevalue_size_xi(xi);
1721}
1722
1723static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1724{
1725        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1726        return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1727}
1728
1729static int ocfs2_bucket_align_free_start(struct super_block *sb,
1730                                         int free_start, int size)
1731{
1732        /*
1733         * We need to make sure that the name+value pair fits within
1734         * one block.
1735         */
1736        if (((free_start - size) >> sb->s_blocksize_bits) !=
1737            ((free_start - 1) >> sb->s_blocksize_bits))
1738                free_start -= free_start % sb->s_blocksize;
1739
1740        return free_start;
1741}
1742
1743static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1744                                       struct ocfs2_xattr_info *xi)
1745{
1746        int rc;
1747        int count = le16_to_cpu(loc->xl_header->xh_count);
1748        int free_start = ocfs2_xa_get_free_start(loc);
1749        int needed_space = ocfs2_xi_entry_usage(xi);
1750        int size = namevalue_size_xi(xi);
1751        struct super_block *sb = loc->xl_inode->i_sb;
1752
1753        /*
1754         * Bucket storage does not reclaim name+value pairs it cannot
1755         * reuse.  They live as holes until the bucket fills, and then
1756         * the bucket is defragmented.  However, the bucket can reclaim
1757         * the ocfs2_xattr_entry.
1758         */
1759        if (loc->xl_entry) {
1760                /* Don't need space if we're reusing! */
1761                if (ocfs2_xa_can_reuse_entry(loc, xi))
1762                        needed_space = 0;
1763                else
1764                        needed_space -= sizeof(struct ocfs2_xattr_entry);
1765        }
1766        BUG_ON(needed_space < 0);
1767
1768        if (free_start < size) {
1769                if (needed_space)
1770                        return -ENOSPC;
1771        } else {
1772                /*
1773                 * First we check if it would fit in the first place.
1774                 * Below, we align the free start to a block.  This may
1775                 * slide us below the minimum gap.  By checking unaligned
1776                 * first, we avoid that error.
1777                 */
1778                rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1779                                                 count);
1780                if (rc)
1781                        return rc;
1782                free_start = ocfs2_bucket_align_free_start(sb, free_start,
1783                                                           size);
1784        }
1785        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1786}
1787
1788static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1789{
1790        le16_add_cpu(&loc->xl_header->xh_name_value_len,
1791                     -namevalue_size_xe(loc->xl_entry));
1792}
1793
1794static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1795{
1796        struct ocfs2_xattr_header *xh = loc->xl_header;
1797        int count = le16_to_cpu(xh->xh_count);
1798        int low = 0, high = count - 1, tmp;
1799        struct ocfs2_xattr_entry *tmp_xe;
1800
1801        /*
1802         * We keep buckets sorted by name_hash, so we need to find
1803         * our insert place.
1804         */
1805        while (low <= high && count) {
1806                tmp = (low + high) / 2;
1807                tmp_xe = &xh->xh_entries[tmp];
1808
1809                if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1810                        low = tmp + 1;
1811                else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1812                        high = tmp - 1;
1813                else {
1814                        low = tmp;
1815                        break;
1816                }
1817        }
1818
1819        if (low != count)
1820                memmove(&xh->xh_entries[low + 1],
1821                        &xh->xh_entries[low],
1822                        ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1823
1824        le16_add_cpu(&xh->xh_count, 1);
1825        loc->xl_entry = &xh->xh_entries[low];
1826        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1827}
1828
1829static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1830{
1831        int free_start = ocfs2_xa_get_free_start(loc);
1832        struct ocfs2_xattr_header *xh = loc->xl_header;
1833        struct super_block *sb = loc->xl_inode->i_sb;
1834        int nameval_offset;
1835
1836        free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1837        nameval_offset = free_start - size;
1838        loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1839        xh->xh_free_start = cpu_to_le16(nameval_offset);
1840        le16_add_cpu(&xh->xh_name_value_len, size);
1841
1842}
1843
1844static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1845                                           struct ocfs2_xattr_value_buf *vb)
1846{
1847        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1848        struct super_block *sb = loc->xl_inode->i_sb;
1849        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1850        int size = namevalue_size_xe(loc->xl_entry);
1851        int block_offset = nameval_offset >> sb->s_blocksize_bits;
1852
1853        /* Values are not allowed to straddle block boundaries */
1854        BUG_ON(block_offset !=
1855               ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1856        /* We expect the bucket to be filled in */
1857        BUG_ON(!bucket->bu_bhs[block_offset]);
1858
1859        vb->vb_access = ocfs2_journal_access;
1860        vb->vb_bh = bucket->bu_bhs[block_offset];
1861}
1862
1863/* Operations for xattrs stored in buckets. */
1864static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1865        .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1866        .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1867        .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1868        .xlo_check_space        = ocfs2_xa_bucket_check_space,
1869        .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1870        .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1871        .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1872        .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1873        .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1874        .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1875};
1876
1877static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1878{
1879        struct ocfs2_xattr_value_buf vb;
1880
1881        if (ocfs2_xattr_is_local(loc->xl_entry))
1882                return 0;
1883
1884        ocfs2_xa_fill_value_buf(loc, &vb);
1885        return le32_to_cpu(vb.vb_xv->xr_clusters);
1886}
1887
1888static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1889                                   struct ocfs2_xattr_set_ctxt *ctxt)
1890{
1891        int trunc_rc, access_rc;
1892        struct ocfs2_xattr_value_buf vb;
1893
1894        ocfs2_xa_fill_value_buf(loc, &vb);
1895        trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1896                                              ctxt);
1897
1898        /*
1899         * The caller of ocfs2_xa_value_truncate() has already called
1900         * ocfs2_xa_journal_access on the loc.  However, The truncate code
1901         * calls ocfs2_extend_trans().  This may commit the previous
1902         * transaction and open a new one.  If this is a bucket, truncate
1903         * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1904         * the caller is expecting to dirty the entire bucket.  So we must
1905         * reset the journal work.  We do this even if truncate has failed,
1906         * as it could have failed after committing the extend.
1907         */
1908        access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1909                                            OCFS2_JOURNAL_ACCESS_WRITE);
1910
1911        /* Errors in truncate take precedence */
1912        return trunc_rc ? trunc_rc : access_rc;
1913}
1914
1915static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1916{
1917        int index, count;
1918        struct ocfs2_xattr_header *xh = loc->xl_header;
1919        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1920
1921        ocfs2_xa_wipe_namevalue(loc);
1922        loc->xl_entry = NULL;
1923
1924        le16_add_cpu(&xh->xh_count, -1);
1925        count = le16_to_cpu(xh->xh_count);
1926
1927        /*
1928         * Only zero out the entry if there are more remaining.  This is
1929         * important for an empty bucket, as it keeps track of the
1930         * bucket's hash value.  It doesn't hurt empty block storage.
1931         */
1932        if (count) {
1933                index = ((char *)entry - (char *)&xh->xh_entries) /
1934                        sizeof(struct ocfs2_xattr_entry);
1935                memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1936                        (count - index) * sizeof(struct ocfs2_xattr_entry));
1937                memset(&xh->xh_entries[count], 0,
1938                       sizeof(struct ocfs2_xattr_entry));
1939        }
1940}
1941
1942/*
1943 * If we have a problem adjusting the size of an external value during
1944 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1945 * in an intermediate state.  For example, the value may be partially
1946 * truncated.
1947 *
1948 * If the value tree hasn't changed, the extend/truncate went nowhere.
1949 * We have nothing to do.  The caller can treat it as a straight error.
1950 *
1951 * If the value tree got partially truncated, we now have a corrupted
1952 * extended attribute.  We're going to wipe its entry and leak the
1953 * clusters.  Better to leak some storage than leave a corrupt entry.
1954 *
1955 * If the value tree grew, it obviously didn't grow enough for the
1956 * new entry.  We're not going to try and reclaim those clusters either.
1957 * If there was already an external value there (orig_clusters != 0),
1958 * the new clusters are attached safely and we can just leave the old
1959 * value in place.  If there was no external value there, we remove
1960 * the entry.
1961 *
1962 * This way, the xattr block we store in the journal will be consistent.
1963 * If the size change broke because of the journal, no changes will hit
1964 * disk anyway.
1965 */
1966static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1967                                            const char *what,
1968                                            unsigned int orig_clusters)
1969{
1970        unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1971        char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1972                                le16_to_cpu(loc->xl_entry->xe_name_offset));
1973
1974        if (new_clusters < orig_clusters) {
1975                mlog(ML_ERROR,
1976                     "Partial truncate while %s xattr %.*s.  Leaking "
1977                     "%u clusters and removing the entry\n",
1978                     what, loc->xl_entry->xe_name_len, nameval_buf,
1979                     orig_clusters - new_clusters);
1980                ocfs2_xa_remove_entry(loc);
1981        } else if (!orig_clusters) {
1982                mlog(ML_ERROR,
1983                     "Unable to allocate an external value for xattr "
1984                     "%.*s safely.  Leaking %u clusters and removing the "
1985                     "entry\n",
1986                     loc->xl_entry->xe_name_len, nameval_buf,
1987                     new_clusters - orig_clusters);
1988                ocfs2_xa_remove_entry(loc);
1989        } else if (new_clusters > orig_clusters)
1990                mlog(ML_ERROR,
1991                     "Unable to grow xattr %.*s safely.  %u new clusters "
1992                     "have been added, but the value will not be "
1993                     "modified\n",
1994                     loc->xl_entry->xe_name_len, nameval_buf,
1995                     new_clusters - orig_clusters);
1996}
1997
1998static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1999                           struct ocfs2_xattr_set_ctxt *ctxt)
2000{
2001        int rc = 0;
2002        unsigned int orig_clusters;
2003
2004        if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2005                orig_clusters = ocfs2_xa_value_clusters(loc);
2006                rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2007                if (rc) {
2008                        mlog_errno(rc);
2009                        /*
2010                         * Since this is remove, we can return 0 if
2011                         * ocfs2_xa_cleanup_value_truncate() is going to
2012                         * wipe the entry anyway.  So we check the
2013                         * cluster count as well.
2014                         */
2015                        if (orig_clusters != ocfs2_xa_value_clusters(loc))
2016                                rc = 0;
2017                        ocfs2_xa_cleanup_value_truncate(loc, "removing",
2018                                                        orig_clusters);
2019                        if (rc)
2020                                goto out;
2021                }
2022        }
2023
2024        ocfs2_xa_remove_entry(loc);
2025
2026out:
2027        return rc;
2028}
2029
2030static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2031{
2032        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2033        char *nameval_buf;
2034
2035        nameval_buf = ocfs2_xa_offset_pointer(loc,
2036                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2037        memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2038}
2039
2040/*
2041 * Take an existing entry and make it ready for the new value.  This
2042 * won't allocate space, but it may free space.  It should be ready for
2043 * ocfs2_xa_prepare_entry() to finish the work.
2044 */
2045static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2046                                struct ocfs2_xattr_info *xi,
2047                                struct ocfs2_xattr_set_ctxt *ctxt)
2048{
2049        int rc = 0;
2050        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2051        unsigned int orig_clusters;
2052        char *nameval_buf;
2053        int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2054        int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2055
2056        BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2057               name_size);
2058
2059        nameval_buf = ocfs2_xa_offset_pointer(loc,
2060                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2061        if (xe_local) {
2062                memset(nameval_buf + name_size, 0,
2063                       namevalue_size_xe(loc->xl_entry) - name_size);
2064                if (!xi_local)
2065                        ocfs2_xa_install_value_root(loc);
2066        } else {
2067                orig_clusters = ocfs2_xa_value_clusters(loc);
2068                if (xi_local) {
2069                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2070                        if (rc < 0)
2071                                mlog_errno(rc);
2072                        else
2073                                memset(nameval_buf + name_size, 0,
2074                                       namevalue_size_xe(loc->xl_entry) -
2075                                       name_size);
2076                } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2077                           xi->xi_value_len) {
2078                        rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2079                                                     ctxt);
2080                        if (rc < 0)
2081                                mlog_errno(rc);
2082                }
2083
2084                if (rc) {
2085                        ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2086                                                        orig_clusters);
2087                        goto out;
2088                }
2089        }
2090
2091        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2092        ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2093
2094out:
2095        return rc;
2096}
2097
2098/*
2099 * Prepares loc->xl_entry to receive the new xattr.  This includes
2100 * properly setting up the name+value pair region.  If loc->xl_entry
2101 * already exists, it will take care of modifying it appropriately.
2102 *
2103 * Note that this modifies the data.  You did journal_access already,
2104 * right?
2105 */
2106static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2107                                  struct ocfs2_xattr_info *xi,
2108                                  u32 name_hash,
2109                                  struct ocfs2_xattr_set_ctxt *ctxt)
2110{
2111        int rc = 0;
2112        unsigned int orig_clusters;
2113        __le64 orig_value_size = 0;
2114
2115        rc = ocfs2_xa_check_space(loc, xi);
2116        if (rc)
2117                goto out;
2118
2119        if (loc->xl_entry) {
2120                if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2121                        orig_value_size = loc->xl_entry->xe_value_size;
2122                        rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2123                        if (rc)
2124                                goto out;
2125                        goto alloc_value;
2126                }
2127
2128                if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2129                        orig_clusters = ocfs2_xa_value_clusters(loc);
2130                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2131                        if (rc) {
2132                                mlog_errno(rc);
2133                                ocfs2_xa_cleanup_value_truncate(loc,
2134                                                                "overwriting",
2135                                                                orig_clusters);
2136                                goto out;
2137                        }
2138                }
2139                ocfs2_xa_wipe_namevalue(loc);
2140        } else
2141                ocfs2_xa_add_entry(loc, name_hash);
2142
2143        /*
2144         * If we get here, we have a blank entry.  Fill it.  We grow our
2145         * name+value pair back from the end.
2146         */
2147        ocfs2_xa_add_namevalue(loc, xi);
2148        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2149                ocfs2_xa_install_value_root(loc);
2150
2151alloc_value:
2152        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2153                orig_clusters = ocfs2_xa_value_clusters(loc);
2154                rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2155                if (rc < 0) {
2156                        ctxt->set_abort = 1;
2157                        ocfs2_xa_cleanup_value_truncate(loc, "growing",
2158                                                        orig_clusters);
2159                        /*
2160                         * If we were growing an existing value,
2161                         * ocfs2_xa_cleanup_value_truncate() won't remove
2162                         * the entry. We need to restore the original value
2163                         * size.
2164                         */
2165                        if (loc->xl_entry) {
2166                                BUG_ON(!orig_value_size);
2167                                loc->xl_entry->xe_value_size = orig_value_size;
2168                        }
2169                        mlog_errno(rc);
2170                }
2171        }
2172
2173out:
2174        return rc;
2175}
2176
2177/*
2178 * Store the value portion of the name+value pair.  This will skip
2179 * values that are stored externally.  Their tree roots were set up
2180 * by ocfs2_xa_prepare_entry().
2181 */
2182static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2183                                struct ocfs2_xattr_info *xi,
2184                                struct ocfs2_xattr_set_ctxt *ctxt)
2185{
2186        int rc = 0;
2187        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2188        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2189        char *nameval_buf;
2190        struct ocfs2_xattr_value_buf vb;
2191
2192        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2193        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2194                ocfs2_xa_fill_value_buf(loc, &vb);
2195                rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2196                                                     ctxt->handle, &vb,
2197                                                     xi->xi_value,
2198                                                     xi->xi_value_len);
2199        } else
2200                memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2201
2202        return rc;
2203}
2204
2205static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2206                        struct ocfs2_xattr_info *xi,
2207                        struct ocfs2_xattr_set_ctxt *ctxt)
2208{
2209        int ret;
2210        u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2211                                              xi->xi_name_len);
2212
2213        ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2214                                      OCFS2_JOURNAL_ACCESS_WRITE);
2215        if (ret) {
2216                mlog_errno(ret);
2217                goto out;
2218        }
2219
2220        /*
2221         * From here on out, everything is going to modify the buffer a
2222         * little.  Errors are going to leave the xattr header in a
2223         * sane state.  Thus, even with errors we dirty the sucker.
2224         */
2225
2226        /* Don't worry, we are never called with !xi_value and !xl_entry */
2227        if (!xi->xi_value) {
2228                ret = ocfs2_xa_remove(loc, ctxt);
2229                goto out_dirty;
2230        }
2231
2232        ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2233        if (ret) {
2234                if (ret != -ENOSPC)
2235                        mlog_errno(ret);
2236                goto out_dirty;
2237        }
2238
2239        ret = ocfs2_xa_store_value(loc, xi, ctxt);
2240        if (ret)
2241                mlog_errno(ret);
2242
2243out_dirty:
2244        ocfs2_xa_journal_dirty(ctxt->handle, loc);
2245
2246out:
2247        return ret;
2248}
2249
2250static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2251                                     struct inode *inode,
2252                                     struct buffer_head *bh,
2253                                     struct ocfs2_xattr_entry *entry)
2254{
2255        struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2256
2257        BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2258
2259        loc->xl_inode = inode;
2260        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2261        loc->xl_storage = bh;
2262        loc->xl_entry = entry;
2263        loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2264        loc->xl_header =
2265                (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2266                                              loc->xl_size);
2267}
2268
2269static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2270                                          struct inode *inode,
2271                                          struct buffer_head *bh,
2272                                          struct ocfs2_xattr_entry *entry)
2273{
2274        struct ocfs2_xattr_block *xb =
2275                (struct ocfs2_xattr_block *)bh->b_data;
2276
2277        BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2278
2279        loc->xl_inode = inode;
2280        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2281        loc->xl_storage = bh;
2282        loc->xl_header = &(xb->xb_attrs.xb_header);
2283        loc->xl_entry = entry;
2284        loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2285                                             xb_attrs.xb_header);
2286}
2287
2288static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2289                                           struct ocfs2_xattr_bucket *bucket,
2290                                           struct ocfs2_xattr_entry *entry)
2291{
2292        loc->xl_inode = bucket->bu_inode;
2293        loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2294        loc->xl_storage = bucket;
2295        loc->xl_header = bucket_xh(bucket);
2296        loc->xl_entry = entry;
2297        loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2298}
2299
2300/*
2301 * In xattr remove, if it is stored outside and refcounted, we may have
2302 * the chance to split the refcount tree. So need the allocators.
2303 */
2304static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2305                                        struct ocfs2_xattr_value_root *xv,
2306                                        struct ocfs2_caching_info *ref_ci,
2307                                        struct buffer_head *ref_root_bh,
2308                                        struct ocfs2_alloc_context **meta_ac,
2309                                        int *ref_credits)
2310{
2311        int ret, meta_add = 0;
2312        u32 p_cluster, num_clusters;
2313        unsigned int ext_flags;
2314
2315        *ref_credits = 0;
2316        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2317                                       &num_clusters,
2318                                       &xv->xr_list,
2319                                       &ext_flags);
2320        if (ret) {
2321                mlog_errno(ret);
2322                goto out;
2323        }
2324
2325        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2326                goto out;
2327
2328        ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2329                                                 ref_root_bh, xv,
2330                                                 &meta_add, ref_credits);
2331        if (ret) {
2332                mlog_errno(ret);
2333                goto out;
2334        }
2335
2336        ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2337                                                meta_add, meta_ac);
2338        if (ret)
2339                mlog_errno(ret);
2340
2341out:
2342        return ret;
2343}
2344
2345static int ocfs2_remove_value_outside(struct inode*inode,
2346                                      struct ocfs2_xattr_value_buf *vb,
2347                                      struct ocfs2_xattr_header *header,
2348                                      struct ocfs2_caching_info *ref_ci,
2349                                      struct buffer_head *ref_root_bh)
2350{
2351        int ret = 0, i, ref_credits;
2352        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2353        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2354        void *val;
2355
2356        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2357
2358        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2359                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2360
2361                if (ocfs2_xattr_is_local(entry))
2362                        continue;
2363
2364                val = (void *)header +
2365                        le16_to_cpu(entry->xe_name_offset);
2366                vb->vb_xv = (struct ocfs2_xattr_value_root *)
2367                        (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2368
2369                ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2370                                                         ref_ci, ref_root_bh,
2371                                                         &ctxt.meta_ac,
2372                                                         &ref_credits);
2373
2374                ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2375                                        ocfs2_remove_extent_credits(osb->sb));
2376                if (IS_ERR(ctxt.handle)) {
2377                        ret = PTR_ERR(ctxt.handle);
2378                        mlog_errno(ret);
2379                        break;
2380                }
2381
2382                ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2383
2384                ocfs2_commit_trans(osb, ctxt.handle);
2385                if (ctxt.meta_ac) {
2386                        ocfs2_free_alloc_context(ctxt.meta_ac);
2387                        ctxt.meta_ac = NULL;
2388                }
2389
2390                if (ret < 0) {
2391                        mlog_errno(ret);
2392                        break;
2393                }
2394
2395        }
2396
2397        if (ctxt.meta_ac)
2398                ocfs2_free_alloc_context(ctxt.meta_ac);
2399        ocfs2_schedule_truncate_log_flush(osb, 1);
2400        ocfs2_run_deallocs(osb, &ctxt.dealloc);
2401        return ret;
2402}
2403
2404static int ocfs2_xattr_ibody_remove(struct inode *inode,
2405                                    struct buffer_head *di_bh,
2406                                    struct ocfs2_caching_info *ref_ci,
2407                                    struct buffer_head *ref_root_bh)
2408{
2409
2410        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2411        struct ocfs2_xattr_header *header;
2412        int ret;
2413        struct ocfs2_xattr_value_buf vb = {
2414                .vb_bh = di_bh,
2415                .vb_access = ocfs2_journal_access_di,
2416        };
2417
2418        header = (struct ocfs2_xattr_header *)
2419                 ((void *)di + inode->i_sb->s_blocksize -
2420                 le16_to_cpu(di->i_xattr_inline_size));
2421
2422        ret = ocfs2_remove_value_outside(inode, &vb, header,
2423                                         ref_ci, ref_root_bh);
2424
2425        return ret;
2426}
2427
2428struct ocfs2_rm_xattr_bucket_para {
2429        struct ocfs2_caching_info *ref_ci;
2430        struct buffer_head *ref_root_bh;
2431};
2432
2433static int ocfs2_xattr_block_remove(struct inode *inode,
2434                                    struct buffer_head *blk_bh,
2435                                    struct ocfs2_caching_info *ref_ci,
2436                                    struct buffer_head *ref_root_bh)
2437{
2438        struct ocfs2_xattr_block *xb;
2439        int ret = 0;
2440        struct ocfs2_xattr_value_buf vb = {
2441                .vb_bh = blk_bh,
2442                .vb_access = ocfs2_journal_access_xb,
2443        };
2444        struct ocfs2_rm_xattr_bucket_para args = {
2445                .ref_ci = ref_ci,
2446                .ref_root_bh = ref_root_bh,
2447        };
2448
2449        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2450        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2451                struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2452                ret = ocfs2_remove_value_outside(inode, &vb, header,
2453                                                 ref_ci, ref_root_bh);
2454        } else
2455                ret = ocfs2_iterate_xattr_index_block(inode,
2456                                                blk_bh,
2457                                                ocfs2_rm_xattr_cluster,
2458                                                &args);
2459
2460        return ret;
2461}
2462
2463static int ocfs2_xattr_free_block(struct inode *inode,
2464                                  u64 block,
2465                                  struct ocfs2_caching_info *ref_ci,
2466                                  struct buffer_head *ref_root_bh)
2467{
2468        struct inode *xb_alloc_inode;
2469        struct buffer_head *xb_alloc_bh = NULL;
2470        struct buffer_head *blk_bh = NULL;
2471        struct ocfs2_xattr_block *xb;
2472        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2473        handle_t *handle;
2474        int ret = 0;
2475        u64 blk, bg_blkno;
2476        u16 bit;
2477
2478        ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2479        if (ret < 0) {
2480                mlog_errno(ret);
2481                goto out;
2482        }
2483
2484        ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2485        if (ret < 0) {
2486                mlog_errno(ret);
2487                goto out;
2488        }
2489
2490        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2491        blk = le64_to_cpu(xb->xb_blkno);
2492        bit = le16_to_cpu(xb->xb_suballoc_bit);
2493        if (xb->xb_suballoc_loc)
2494                bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2495        else
2496                bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2497
2498        xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2499                                EXTENT_ALLOC_SYSTEM_INODE,
2500                                le16_to_cpu(xb->xb_suballoc_slot));
2501        if (!xb_alloc_inode) {
2502                ret = -ENOMEM;
2503                mlog_errno(ret);
2504                goto out;
2505        }
2506        mutex_lock(&xb_alloc_inode->i_mutex);
2507
2508        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2509        if (ret < 0) {
2510                mlog_errno(ret);
2511                goto out_mutex;
2512        }
2513
2514        handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2515        if (IS_ERR(handle)) {
2516                ret = PTR_ERR(handle);
2517                mlog_errno(ret);
2518                goto out_unlock;
2519        }
2520
2521        ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2522                                       bit, bg_blkno, 1);
2523        if (ret < 0)
2524                mlog_errno(ret);
2525
2526        ocfs2_commit_trans(osb, handle);
2527out_unlock:
2528        ocfs2_inode_unlock(xb_alloc_inode, 1);
2529        brelse(xb_alloc_bh);
2530out_mutex:
2531        mutex_unlock(&xb_alloc_inode->i_mutex);
2532        iput(xb_alloc_inode);
2533out:
2534        brelse(blk_bh);
2535        return ret;
2536}
2537
2538/*
2539 * ocfs2_xattr_remove()
2540 *
2541 * Free extended attribute resources associated with this inode.
2542 */
2543int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2544{
2545        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2546        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2547        struct ocfs2_refcount_tree *ref_tree = NULL;
2548        struct buffer_head *ref_root_bh = NULL;
2549        struct ocfs2_caching_info *ref_ci = NULL;
2550        handle_t *handle;
2551        int ret;
2552
2553        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2554                return 0;
2555
2556        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2557                return 0;
2558
2559        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2560                ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2561                                               le64_to_cpu(di->i_refcount_loc),
2562                                               1, &ref_tree, &ref_root_bh);
2563                if (ret) {
2564                        mlog_errno(ret);
2565                        goto out;
2566                }
2567                ref_ci = &ref_tree->rf_ci;
2568
2569        }
2570
2571        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2572                ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2573                                               ref_ci, ref_root_bh);
2574                if (ret < 0) {
2575                        mlog_errno(ret);
2576                        goto out;
2577                }
2578        }
2579
2580        if (di->i_xattr_loc) {
2581                ret = ocfs2_xattr_free_block(inode,
2582                                             le64_to_cpu(di->i_xattr_loc),
2583                                             ref_ci, ref_root_bh);
2584                if (ret < 0) {
2585                        mlog_errno(ret);
2586                        goto out;
2587                }
2588        }
2589
2590        handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2591                                   OCFS2_INODE_UPDATE_CREDITS);
2592        if (IS_ERR(handle)) {
2593                ret = PTR_ERR(handle);
2594                mlog_errno(ret);
2595                goto out;
2596        }
2597        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2598                                      OCFS2_JOURNAL_ACCESS_WRITE);
2599        if (ret) {
2600                mlog_errno(ret);
2601                goto out_commit;
2602        }
2603
2604        di->i_xattr_loc = 0;
2605
2606        spin_lock(&oi->ip_lock);
2607        oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2608        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2609        spin_unlock(&oi->ip_lock);
2610        ocfs2_update_inode_fsync_trans(handle, inode, 0);
2611
2612        ocfs2_journal_dirty(handle, di_bh);
2613out_commit:
2614        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2615out:
2616        if (ref_tree)
2617                ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2618        brelse(ref_root_bh);
2619        return ret;
2620}
2621
2622static int ocfs2_xattr_has_space_inline(struct inode *inode,
2623                                        struct ocfs2_dinode *di)
2624{
2625        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2626        unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2627        int free;
2628
2629        if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2630                return 0;
2631
2632        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2633                struct ocfs2_inline_data *idata = &di->id2.i_data;
2634                free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2635        } else if (ocfs2_inode_is_fast_symlink(inode)) {
2636                free = ocfs2_fast_symlink_chars(inode->i_sb) -
2637                        le64_to_cpu(di->i_size);
2638        } else {
2639                struct ocfs2_extent_list *el = &di->id2.i_list;
2640                free = (le16_to_cpu(el->l_count) -
2641                        le16_to_cpu(el->l_next_free_rec)) *
2642                        sizeof(struct ocfs2_extent_rec);
2643        }
2644        if (free >= xattrsize)
2645                return 1;
2646
2647        return 0;
2648}
2649
2650/*
2651 * ocfs2_xattr_ibody_find()
2652 *
2653 * Find extended attribute in inode block and
2654 * fill search info into struct ocfs2_xattr_search.
2655 */
2656static int ocfs2_xattr_ibody_find(struct inode *inode,
2657                                  int name_index,
2658                                  const char *name,
2659                                  struct ocfs2_xattr_search *xs)
2660{
2661        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2662        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2663        int ret;
2664        int has_space = 0;
2665
2666        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2667                return 0;
2668
2669        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2670                down_read(&oi->ip_alloc_sem);
2671                has_space = ocfs2_xattr_has_space_inline(inode, di);
2672                up_read(&oi->ip_alloc_sem);
2673                if (!has_space)
2674                        return 0;
2675        }
2676
2677        xs->xattr_bh = xs->inode_bh;
2678        xs->end = (void *)di + inode->i_sb->s_blocksize;
2679        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2680                xs->header = (struct ocfs2_xattr_header *)
2681                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2682        else
2683                xs->header = (struct ocfs2_xattr_header *)
2684                        (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2685        xs->base = (void *)xs->header;
2686        xs->here = xs->header->xh_entries;
2687
2688        /* Find the named attribute. */
2689        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2690                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2691                if (ret && ret != -ENODATA)
2692                        return ret;
2693                xs->not_found = ret;
2694        }
2695
2696        return 0;
2697}
2698
2699static int ocfs2_xattr_ibody_init(struct inode *inode,
2700                                  struct buffer_head *di_bh,
2701                                  struct ocfs2_xattr_set_ctxt *ctxt)
2702{
2703        int ret;
2704        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2705        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2706        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2707        unsigned int xattrsize = osb->s_xattr_inline_size;
2708
2709        if (!ocfs2_xattr_has_space_inline(inode, di)) {
2710                ret = -ENOSPC;
2711                goto out;
2712        }
2713
2714        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2715                                      OCFS2_JOURNAL_ACCESS_WRITE);
2716        if (ret) {
2717                mlog_errno(ret);
2718                goto out;
2719        }
2720
2721        /*
2722         * Adjust extent record count or inline data size
2723         * to reserve space for extended attribute.
2724         */
2725        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2726                struct ocfs2_inline_data *idata = &di->id2.i_data;
2727                le16_add_cpu(&idata->id_count, -xattrsize);
2728        } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2729                struct ocfs2_extent_list *el = &di->id2.i_list;
2730                le16_add_cpu(&el->l_count, -(xattrsize /
2731                                             sizeof(struct ocfs2_extent_rec)));
2732        }
2733        di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2734
2735        spin_lock(&oi->ip_lock);
2736        oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2737        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2738        spin_unlock(&oi->ip_lock);
2739
2740        ocfs2_journal_dirty(ctxt->handle, di_bh);
2741
2742out:
2743        return ret;
2744}
2745
2746/*
2747 * ocfs2_xattr_ibody_set()
2748 *
2749 * Set, replace or remove an extended attribute into inode block.
2750 *
2751 */
2752static int ocfs2_xattr_ibody_set(struct inode *inode,
2753                                 struct ocfs2_xattr_info *xi,
2754                                 struct ocfs2_xattr_search *xs,
2755                                 struct ocfs2_xattr_set_ctxt *ctxt)
2756{
2757        int ret;
2758        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2759        struct ocfs2_xa_loc loc;
2760
2761        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2762                return -ENOSPC;
2763
2764        down_write(&oi->ip_alloc_sem);
2765        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2766                ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2767                if (ret) {
2768                        if (ret != -ENOSPC)
2769                                mlog_errno(ret);
2770                        goto out;
2771                }
2772        }
2773
2774        ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2775                                 xs->not_found ? NULL : xs->here);
2776        ret = ocfs2_xa_set(&loc, xi, ctxt);
2777        if (ret) {
2778                if (ret != -ENOSPC)
2779                        mlog_errno(ret);
2780                goto out;
2781        }
2782        xs->here = loc.xl_entry;
2783
2784out:
2785        up_write(&oi->ip_alloc_sem);
2786
2787        return ret;
2788}
2789
2790/*
2791 * ocfs2_xattr_block_find()
2792 *
2793 * Find extended attribute in external block and
2794 * fill search info into struct ocfs2_xattr_search.
2795 */
2796static int ocfs2_xattr_block_find(struct inode *inode,
2797                                  int name_index,
2798                                  const char *name,
2799                                  struct ocfs2_xattr_search *xs)
2800{
2801        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2802        struct buffer_head *blk_bh = NULL;
2803        struct ocfs2_xattr_block *xb;
2804        int ret = 0;
2805
2806        if (!di->i_xattr_loc)
2807                return ret;
2808
2809        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2810                                     &blk_bh);
2811        if (ret < 0) {
2812                mlog_errno(ret);
2813                return ret;
2814        }
2815
2816        xs->xattr_bh = blk_bh;
2817        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2818
2819        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2820                xs->header = &xb->xb_attrs.xb_header;
2821                xs->base = (void *)xs->header;
2822                xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2823                xs->here = xs->header->xh_entries;
2824
2825                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2826        } else
2827                ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2828                                                   name_index,
2829                                                   name, xs);
2830
2831        if (ret && ret != -ENODATA) {
2832                xs->xattr_bh = NULL;
2833                goto cleanup;
2834        }
2835        xs->not_found = ret;
2836        return 0;
2837cleanup:
2838        brelse(blk_bh);
2839
2840        return ret;
2841}
2842
2843static int ocfs2_create_xattr_block(struct inode *inode,
2844                                    struct buffer_head *inode_bh,
2845                                    struct ocfs2_xattr_set_ctxt *ctxt,
2846                                    int indexed,
2847                                    struct buffer_head **ret_bh)
2848{
2849        int ret;
2850        u16 suballoc_bit_start;
2851        u32 num_got;
2852        u64 suballoc_loc, first_blkno;
2853        struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2854        struct buffer_head *new_bh = NULL;
2855        struct ocfs2_xattr_block *xblk;
2856
2857        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2858                                      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2859        if (ret < 0) {
2860                mlog_errno(ret);
2861                goto end;
2862        }
2863
2864        ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2865                                   &suballoc_loc, &suballoc_bit_start,
2866                                   &num_got, &first_blkno);
2867        if (ret < 0) {
2868                mlog_errno(ret);
2869                goto end;
2870        }
2871
2872        new_bh = sb_getblk(inode->i_sb, first_blkno);
2873        if (!new_bh) {
2874                ret = -ENOMEM;
2875                mlog_errno(ret);
2876                goto end;
2877        }
2878
2879        ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2880
2881        ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2882                                      new_bh,
2883                                      OCFS2_JOURNAL_ACCESS_CREATE);
2884        if (ret < 0) {
2885                mlog_errno(ret);
2886                goto end;
2887        }
2888
2889        /* Initialize ocfs2_xattr_block */
2890        xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2891        memset(xblk, 0, inode->i_sb->s_blocksize);
2892        strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2893        xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2894        xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2895        xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2896        xblk->xb_fs_generation =
2897                cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2898        xblk->xb_blkno = cpu_to_le64(first_blkno);
2899        if (indexed) {
2900                struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2901                xr->xt_clusters = cpu_to_le32(1);
2902                xr->xt_last_eb_blk = 0;
2903                xr->xt_list.l_tree_depth = 0;
2904                xr->xt_list.l_count = cpu_to_le16(
2905                                        ocfs2_xattr_recs_per_xb(inode->i_sb));
2906                xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2907                xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2908        }
2909        ocfs2_journal_dirty(ctxt->handle, new_bh);
2910
2911        /* Add it to the inode */
2912        di->i_xattr_loc = cpu_to_le64(first_blkno);
2913
2914        spin_lock(&OCFS2_I(inode)->ip_lock);
2915        OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2916        di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2917        spin_unlock(&OCFS2_I(inode)->ip_lock);
2918
2919        ocfs2_journal_dirty(ctxt->handle, inode_bh);
2920
2921        *ret_bh = new_bh;
2922        new_bh = NULL;
2923
2924end:
2925        brelse(new_bh);
2926        return ret;
2927}
2928
2929/*
2930 * ocfs2_xattr_block_set()
2931 *
2932 * Set, replace or remove an extended attribute into external block.
2933 *
2934 */
2935static int ocfs2_xattr_block_set(struct inode *inode,
2936                                 struct ocfs2_xattr_info *xi,
2937                                 struct ocfs2_xattr_search *xs,
2938                                 struct ocfs2_xattr_set_ctxt *ctxt)
2939{
2940        struct buffer_head *new_bh = NULL;
2941        struct ocfs2_xattr_block *xblk = NULL;
2942        int ret;
2943        struct ocfs2_xa_loc loc;
2944
2945        if (!xs->xattr_bh) {
2946                ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2947                                               0, &new_bh);
2948                if (ret) {
2949                        mlog_errno(ret);
2950                        goto end;
2951                }
2952
2953                xs->xattr_bh = new_bh;
2954                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2955                xs->header = &xblk->xb_attrs.xb_header;
2956                xs->base = (void *)xs->header;
2957                xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2958                xs->here = xs->header->xh_entries;
2959        } else
2960                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2961
2962        if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2963                ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2964                                              xs->not_found ? NULL : xs->here);
2965
2966                ret = ocfs2_xa_set(&loc, xi, ctxt);
2967                if (!ret)
2968                        xs->here = loc.xl_entry;
2969                else if ((ret != -ENOSPC) || ctxt->set_abort)
2970                        goto end;
2971                else {
2972                        ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2973                        if (ret)
2974                                goto end;
2975                }
2976        }
2977
2978        if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2979                ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2980
2981end:
2982        return ret;
2983}
2984
2985/* Check whether the new xattr can be inserted into the inode. */
2986static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2987                                       struct ocfs2_xattr_info *xi,
2988                                       struct ocfs2_xattr_search *xs)
2989{
2990        struct ocfs2_xattr_entry *last;
2991        int free, i;
2992        size_t min_offs = xs->end - xs->base;
2993
2994        if (!xs->header)
2995                return 0;
2996
2997        last = xs->header->xh_entries;
2998
2999        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3000                size_t offs = le16_to_cpu(last->xe_name_offset);
3001                if (offs < min_offs)
3002                        min_offs = offs;
3003                last += 1;
3004        }
3005
3006        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3007        if (free < 0)
3008                return 0;
3009
3010        BUG_ON(!xs->not_found);
3011
3012        if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3013                return 1;
3014
3015        return 0;
3016}
3017
3018static int ocfs2_calc_xattr_set_need(struct inode *inode,
3019                                     struct ocfs2_dinode *di,
3020                                     struct ocfs2_xattr_info *xi,
3021                                     struct ocfs2_xattr_search *xis,
3022                                     struct ocfs2_xattr_search *xbs,
3023                                     int *clusters_need,
3024                                     int *meta_need,
3025                                     int *credits_need)
3026{
3027        int ret = 0, old_in_xb = 0;
3028        int clusters_add = 0, meta_add = 0, credits = 0;
3029        struct buffer_head *bh = NULL;
3030        struct ocfs2_xattr_block *xb = NULL;
3031        struct ocfs2_xattr_entry *xe = NULL;
3032        struct ocfs2_xattr_value_root *xv = NULL;
3033        char *base = NULL;
3034        int name_offset, name_len = 0;
3035        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3036                                                    xi->xi_value_len);
3037        u64 value_size;
3038
3039        /*
3040         * Calculate the clusters we need to write.
3041         * No matter whether we replace an old one or add a new one,
3042         * we need this for writing.
3043         */
3044        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3045                credits += new_clusters *
3046                           ocfs2_clusters_to_blocks(inode->i_sb, 1);
3047
3048        if (xis->not_found && xbs->not_found) {
3049                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3050
3051                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3052                        clusters_add += new_clusters;
3053                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3054                                                        &def_xv.xv.xr_list);
3055                }
3056
3057                goto meta_guess;
3058        }
3059
3060        if (!xis->not_found) {
3061                xe = xis->here;
3062                name_offset = le16_to_cpu(xe->xe_name_offset);
3063                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3064                base = xis->base;
3065                credits += OCFS2_INODE_UPDATE_CREDITS;
3066        } else {
3067                int i, block_off = 0;
3068                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3069                xe = xbs->here;
3070                name_offset = le16_to_cpu(xe->xe_name_offset);
3071                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3072                i = xbs->here - xbs->header->xh_entries;
3073                old_in_xb = 1;
3074
3075                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3076                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3077                                                        bucket_xh(xbs->bucket),
3078                                                        i, &block_off,
3079                                                        &name_offset);
3080                        base = bucket_block(xbs->bucket, block_off);
3081                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3082                } else {
3083                        base = xbs->base;
3084                        credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3085                }
3086        }
3087
3088        /*
3089         * delete a xattr doesn't need metadata and cluster allocation.
3090         * so just calculate the credits and return.
3091         *
3092         * The credits for removing the value tree will be extended
3093         * by ocfs2_remove_extent itself.
3094         */
3095        if (!xi->xi_value) {
3096                if (!ocfs2_xattr_is_local(xe))
3097                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3098
3099                goto out;
3100        }
3101
3102        /* do cluster allocation guess first. */
3103        value_size = le64_to_cpu(xe->xe_value_size);
3104
3105        if (old_in_xb) {
3106                /*
3107                 * In xattr set, we always try to set the xe in inode first,
3108                 * so if it can be inserted into inode successfully, the old
3109                 * one will be removed from the xattr block, and this xattr
3110                 * will be inserted into inode as a new xattr in inode.
3111                 */
3112                if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3113                        clusters_add += new_clusters;
3114                        credits += ocfs2_remove_extent_credits(inode->i_sb) +
3115                                    OCFS2_INODE_UPDATE_CREDITS;
3116                        if (!ocfs2_xattr_is_local(xe))
3117                                credits += ocfs2_calc_extend_credits(
3118                                                        inode->i_sb,
3119                                                        &def_xv.xv.xr_list);
3120                        goto out;
3121                }
3122        }
3123
3124        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3125                /* the new values will be stored outside. */
3126                u32 old_clusters = 0;
3127
3128                if (!ocfs2_xattr_is_local(xe)) {
3129                        old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3130                                                                 value_size);
3131                        xv = (struct ocfs2_xattr_value_root *)
3132                             (base + name_offset + name_len);
3133                        value_size = OCFS2_XATTR_ROOT_SIZE;
3134                } else
3135                        xv = &def_xv.xv;
3136
3137                if (old_clusters >= new_clusters) {
3138                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3139                        goto out;
3140                } else {
3141                        meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3142                        clusters_add += new_clusters - old_clusters;
3143                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3144                                                             &xv->xr_list);
3145                        if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3146                                goto out;
3147                }
3148        } else {
3149                /*
3150                 * Now the new value will be stored inside. So if the new
3151                 * value is smaller than the size of value root or the old
3152                 * value, we don't need any allocation, otherwise we have
3153                 * to guess metadata allocation.
3154                 */
3155                if ((ocfs2_xattr_is_local(xe) &&
3156                     (value_size >= xi->xi_value_len)) ||
3157                    (!ocfs2_xattr_is_local(xe) &&
3158                     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3159                        goto out;
3160        }
3161
3162meta_guess:
3163        /* calculate metadata allocation. */
3164        if (di->i_xattr_loc) {
3165                if (!xbs->xattr_bh) {
3166                        ret = ocfs2_read_xattr_block(inode,
3167                                                     le64_to_cpu(di->i_xattr_loc),
3168                                                     &bh);
3169                        if (ret) {
3170                                mlog_errno(ret);
3171                                goto out;
3172                        }
3173
3174                        xb = (struct ocfs2_xattr_block *)bh->b_data;
3175                } else
3176                        xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3177
3178                /*
3179                 * If there is already an xattr tree, good, we can calculate
3180                 * like other b-trees. Otherwise we may have the chance of
3181                 * create a tree, the credit calculation is borrowed from
3182                 * ocfs2_calc_extend_credits with root_el = NULL. And the
3183                 * new tree will be cluster based, so no meta is needed.
3184                 */
3185                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3186                        struct ocfs2_extent_list *el =
3187                                 &xb->xb_attrs.xb_root.xt_list;
3188                        meta_add += ocfs2_extend_meta_needed(el);
3189                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3190                                                             el);
3191                } else
3192                        credits += OCFS2_SUBALLOC_ALLOC + 1;
3193
3194                /*
3195                 * This cluster will be used either for new bucket or for
3196                 * new xattr block.
3197                 * If the cluster size is the same as the bucket size, one
3198                 * more is needed since we may need to extend the bucket
3199                 * also.
3200                 */
3201                clusters_add += 1;
3202                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3203                if (OCFS2_XATTR_BUCKET_SIZE ==
3204                        OCFS2_SB(inode->i_sb)->s_clustersize) {
3205                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3206                        clusters_add += 1;
3207                }
3208        } else {
3209                credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3210                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3211                        struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3212                        meta_add += ocfs2_extend_meta_needed(el);
3213                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3214                                                             el);
3215                } else {
3216                        meta_add += 1;
3217                }
3218        }
3219out:
3220        if (clusters_need)
3221                *clusters_need = clusters_add;
3222        if (meta_need)
3223                *meta_need = meta_add;
3224        if (credits_need)
3225                *credits_need = credits;
3226        brelse(bh);
3227        return ret;
3228}
3229
3230static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3231                                     struct ocfs2_dinode *di,
3232                                     struct ocfs2_xattr_info *xi,
3233                                     struct ocfs2_xattr_search *xis,
3234                                     struct ocfs2_xattr_search *xbs,
3235                                     struct ocfs2_xattr_set_ctxt *ctxt,
3236                                     int extra_meta,
3237                                     int *credits)
3238{
3239        int clusters_add, meta_add, ret;
3240        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3241
3242        memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3243
3244        ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3245
3246        ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3247                                        &clusters_add, &meta_add, credits);
3248        if (ret) {
3249                mlog_errno(ret);
3250                return ret;
3251        }
3252
3253        meta_add += extra_meta;
3254        trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3255                                        clusters_add, *credits);
3256
3257        if (meta_add) {
3258                ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3259                                                        &ctxt->meta_ac);
3260                if (ret) {
3261                        mlog_errno(ret);
3262                        goto out;
3263                }
3264        }
3265
3266        if (clusters_add) {
3267                ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3268                if (ret)
3269                        mlog_errno(ret);
3270        }
3271out:
3272        if (ret) {
3273                if (ctxt->meta_ac) {
3274                        ocfs2_free_alloc_context(ctxt->meta_ac);
3275                        ctxt->meta_ac = NULL;
3276                }
3277
3278                /*
3279                 * We cannot have an error and a non null ctxt->data_ac.
3280                 */
3281        }
3282
3283        return ret;
3284}
3285
3286static int __ocfs2_xattr_set_handle(struct inode *inode,
3287                                    struct ocfs2_dinode *di,
3288                                    struct ocfs2_xattr_info *xi,
3289                                    struct ocfs2_xattr_search *xis,
3290                                    struct ocfs2_xattr_search *xbs,
3291                                    struct ocfs2_xattr_set_ctxt *ctxt)
3292{
3293        int ret = 0, credits, old_found;
3294
3295        if (!xi->xi_value) {
3296                /* Remove existing extended attribute */
3297                if (!xis->not_found)
3298                        ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3299                else if (!xbs->not_found)
3300                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3301        } else {
3302                /* We always try to set extended attribute into inode first*/
3303                ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3304                if (!ret && !xbs->not_found) {
3305                        /*
3306                         * If succeed and that extended attribute existing in
3307                         * external block, then we will remove it.
3308                         */
3309                        xi->xi_value = NULL;
3310                        xi->xi_value_len = 0;
3311
3312                        old_found = xis->not_found;
3313                        xis->not_found = -ENODATA;
3314                        ret = ocfs2_calc_xattr_set_need(inode,
3315                                                        di,
3316                                                        xi,
3317                                                        xis,
3318                                                        xbs,
3319                                                        NULL,
3320                                                        NULL,
3321                                                        &credits);
3322                        xis->not_found = old_found;
3323                        if (ret) {
3324                                mlog_errno(ret);
3325                                goto out;
3326                        }
3327
3328                        ret = ocfs2_extend_trans(ctxt->handle, credits);
3329                        if (ret) {
3330                                mlog_errno(ret);
3331                                goto out;
3332                        }
3333                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3334                } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3335                        if (di->i_xattr_loc && !xbs->xattr_bh) {
3336                                ret = ocfs2_xattr_block_find(inode,
3337                                                             xi->xi_name_index,
3338                                                             xi->xi_name, xbs);
3339                                if (ret)
3340                                        goto out;
3341
3342                                old_found = xis->not_found;
3343                                xis->not_found = -ENODATA;
3344                                ret = ocfs2_calc_xattr_set_need(inode,
3345                                                                di,
3346                                                                xi,
3347                                                                xis,
3348                                                                xbs,
3349                                                                NULL,
3350                                                                NULL,
3351                                                                &credits);
3352                                xis->not_found = old_found;
3353                                if (ret) {
3354                                        mlog_errno(ret);
3355                                        goto out;
3356                                }
3357
3358                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3359                                if (ret) {
3360                                        mlog_errno(ret);
3361                                        goto out;
3362                                }
3363                        }
3364                        /*
3365                         * If no space in inode, we will set extended attribute
3366                         * into external block.
3367                         */
3368                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3369                        if (ret)
3370                                goto out;
3371                        if (!xis->not_found) {
3372                                /*
3373                                 * If succeed and that extended attribute
3374                                 * existing in inode, we will remove it.
3375                                 */
3376                                xi->xi_value = NULL;
3377                                xi->xi_value_len = 0;
3378                                xbs->not_found = -ENODATA;
3379                                ret = ocfs2_calc_xattr_set_need(inode,
3380                                                                di,
3381                                                                xi,
3382                                                                xis,
3383                                                                xbs,
3384                                                                NULL,
3385                                                                NULL,
3386                                                                &credits);
3387                                if (ret) {
3388                                        mlog_errno(ret);
3389                                        goto out;
3390                                }
3391
3392                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3393                                if (ret) {
3394                                        mlog_errno(ret);
3395                                        goto out;
3396                                }
3397                                ret = ocfs2_xattr_ibody_set(inode, xi,
3398                                                            xis, ctxt);
3399                        }
3400                }
3401        }
3402
3403        if (!ret) {
3404                /* Update inode ctime. */
3405                ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3406                                              xis->inode_bh,
3407                                              OCFS2_JOURNAL_ACCESS_WRITE);
3408                if (ret) {
3409                        mlog_errno(ret);
3410                        goto out;
3411                }
3412
3413                inode->i_ctime = CURRENT_TIME;
3414                di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3415                di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3416                ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3417        }
3418out:
3419        return ret;
3420}
3421
3422/*
3423 * This function only called duing creating inode
3424 * for init security/acl xattrs of the new inode.
3425 * All transanction credits have been reserved in mknod.
3426 */
3427int ocfs2_xattr_set_handle(handle_t *handle,
3428                           struct inode *inode,
3429                           struct buffer_head *di_bh,
3430                           int name_index,
3431                           const char *name,
3432                           const void *value,
3433                           size_t value_len,
3434                           int flags,
3435                           struct ocfs2_alloc_context *meta_ac,
3436                           struct ocfs2_alloc_context *data_ac)
3437{
3438        struct ocfs2_dinode *di;
3439        int ret;
3440
3441        struct ocfs2_xattr_info xi = {
3442                .xi_name_index = name_index,
3443                .xi_name = name,
3444                .xi_name_len = strlen(name),
3445                .xi_value = value,
3446                .xi_value_len = value_len,
3447        };
3448
3449        struct ocfs2_xattr_search xis = {
3450                .not_found = -ENODATA,
3451        };
3452
3453        struct ocfs2_xattr_search xbs = {
3454                .not_found = -ENODATA,
3455        };
3456
3457        struct ocfs2_xattr_set_ctxt ctxt = {
3458                .handle = handle,
3459                .meta_ac = meta_ac,
3460                .data_ac = data_ac,
3461        };
3462
3463        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3464                return -EOPNOTSUPP;
3465
3466        /*
3467         * In extreme situation, may need xattr bucket when
3468         * block size is too small. And we have already reserved
3469         * the credits for bucket in mknod.
3470         */
3471        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3472                xbs.bucket = ocfs2_xattr_bucket_new(inode);
3473                if (!xbs.bucket) {
3474                        mlog_errno(-ENOMEM);
3475                        return -ENOMEM;
3476                }
3477        }
3478
3479        xis.inode_bh = xbs.inode_bh = di_bh;
3480        di = (struct ocfs2_dinode *)di_bh->b_data;
3481
3482        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3483
3484        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3485        if (ret)
3486                goto cleanup;
3487        if (xis.not_found) {
3488                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3489                if (ret)
3490                        goto cleanup;
3491        }
3492
3493        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3494
3495cleanup:
3496        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3497        brelse(xbs.xattr_bh);
3498        ocfs2_xattr_bucket_free(xbs.bucket);
3499
3500        return ret;
3501}
3502
3503/*
3504 * ocfs2_xattr_set()
3505 *
3506 * Set, replace or remove an extended attribute for this inode.
3507 * value is NULL to remove an existing extended attribute, else either
3508 * create or replace an extended attribute.
3509 */
3510int ocfs2_xattr_set(struct inode *inode,
3511                    int name_index,
3512                    const char *name,
3513                    const void *value,
3514                    size_t value_len,
3515                    int flags)
3516{
3517        struct buffer_head *di_bh = NULL;
3518        struct ocfs2_dinode *di;
3519        int ret, credits, ref_meta = 0, ref_credits = 0;
3520        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3521        struct inode *tl_inode = osb->osb_tl_inode;
3522        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3523        struct ocfs2_refcount_tree *ref_tree = NULL;
3524
3525        struct ocfs2_xattr_info xi = {
3526                .xi_name_index = name_index,
3527                .xi_name = name,
3528                .xi_name_len = strlen(name),
3529                .xi_value = value,
3530                .xi_value_len = value_len,
3531        };
3532
3533        struct ocfs2_xattr_search xis = {
3534                .not_found = -ENODATA,
3535        };
3536
3537        struct ocfs2_xattr_search xbs = {
3538                .not_found = -ENODATA,
3539        };
3540
3541        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3542                return -EOPNOTSUPP;
3543
3544        /*
3545         * Only xbs will be used on indexed trees.  xis doesn't need a
3546         * bucket.
3547         */
3548        xbs.bucket = ocfs2_xattr_bucket_new(inode);
3549        if (!xbs.bucket) {
3550                mlog_errno(-ENOMEM);
3551                return -ENOMEM;
3552        }
3553
3554        ret = ocfs2_inode_lock(inode, &di_bh, 1);
3555        if (ret < 0) {
3556                mlog_errno(ret);
3557                goto cleanup_nolock;
3558        }
3559        xis.inode_bh = xbs.inode_bh = di_bh;
3560        di = (struct ocfs2_dinode *)di_bh->b_data;
3561
3562        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3563        /*
3564         * Scan inode and external block to find the same name
3565         * extended attribute and collect search information.
3566         */
3567        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3568        if (ret)
3569                goto cleanup;
3570        if (xis.not_found) {
3571                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3572                if (ret)
3573                        goto cleanup;
3574        }
3575
3576        if (xis.not_found && xbs.not_found) {
3577                ret = -ENODATA;
3578                if (flags & XATTR_REPLACE)
3579                        goto cleanup;
3580                ret = 0;
3581                if (!value)
3582                        goto cleanup;
3583        } else {
3584                ret = -EEXIST;
3585                if (flags & XATTR_CREATE)
3586                        goto cleanup;
3587        }
3588
3589        /* Check whether the value is refcounted and do some preparation. */
3590        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3591            (!xis.not_found || !xbs.not_found)) {
3592                ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3593                                                   &xis, &xbs, &ref_tree,
3594                                                   &ref_meta, &ref_credits);
3595                if (ret) {
3596                        mlog_errno(ret);
3597                        goto cleanup;
3598                }
3599        }
3600
3601        mutex_lock(&tl_inode->i_mutex);
3602
3603        if (ocfs2_truncate_log_needs_flush(osb)) {
3604                ret = __ocfs2_flush_truncate_log(osb);
3605                if (ret < 0) {
3606                        mutex_unlock(&tl_inode->i_mutex);
3607                        mlog_errno(ret);
3608                        goto cleanup;
3609                }
3610        }
3611        mutex_unlock(&tl_inode->i_mutex);
3612
3613        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3614                                        &xbs, &ctxt, ref_meta, &credits);
3615        if (ret) {
3616                mlog_errno(ret);
3617                goto cleanup;
3618        }
3619
3620        /* we need to update inode's ctime field, so add credit for it. */
3621        credits += OCFS2_INODE_UPDATE_CREDITS;
3622        ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3623        if (IS_ERR(ctxt.handle)) {
3624                ret = PTR_ERR(ctxt.handle);
3625                mlog_errno(ret);
3626                goto out_free_ac;
3627        }
3628
3629        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3630        ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3631
3632        ocfs2_commit_trans(osb, ctxt.handle);
3633
3634out_free_ac:
3635        if (ctxt.data_ac)
3636                ocfs2_free_alloc_context(ctxt.data_ac);
3637        if (ctxt.meta_ac)
3638                ocfs2_free_alloc_context(ctxt.meta_ac);
3639        if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3640                ocfs2_schedule_truncate_log_flush(osb, 1);
3641        ocfs2_run_deallocs(osb, &ctxt.dealloc);
3642
3643cleanup:
3644        if (ref_tree)
3645                ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3646        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3647        if (!value && !ret) {
3648                ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3649                if (ret)
3650                        mlog_errno(ret);
3651        }
3652        ocfs2_inode_unlock(inode, 1);
3653cleanup_nolock:
3654        brelse(di_bh);
3655        brelse(xbs.xattr_bh);
3656        ocfs2_xattr_bucket_free(xbs.bucket);
3657
3658        return ret;
3659}
3660
3661/*
3662 * Find the xattr extent rec which may contains name_hash.
3663 * e_cpos will be the first name hash of the xattr rec.
3664 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3665 */
3666static int ocfs2_xattr_get_rec(struct inode *inode,
3667                               u32 name_hash,
3668                               u64 *p_blkno,
3669                               u32 *e_cpos,
3670                               u32 *num_clusters,
3671                               struct ocfs2_extent_list *el)
3672{
3673        int ret = 0, i;
3674        struct buffer_head *eb_bh = NULL;
3675        struct ocfs2_extent_block *eb;
3676        struct ocfs2_extent_rec *rec = NULL;
3677        u64 e_blkno = 0;
3678
3679        if (el->l_tree_depth) {
3680                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3681                                      &eb_bh);
3682                if (ret) {
3683                        mlog_errno(ret);
3684                        goto out;
3685                }
3686
3687                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3688                el = &eb->h_list;
3689
3690                if (el->l_tree_depth) {
3691                        ret = ocfs2_error(inode->i_sb,
3692                                          "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3693                                          inode->i_ino,
3694                                          (unsigned long long)eb_bh->b_blocknr);
3695                        goto out;
3696                }
3697        }
3698
3699        for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3700                rec = &el->l_recs[i];
3701
3702                if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3703                        e_blkno = le64_to_cpu(rec->e_blkno);
3704                        break;
3705                }
3706        }
3707
3708        if (!e_blkno) {
3709                ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3710                                  inode->i_ino,
3711                                  le32_to_cpu(rec->e_cpos),
3712                                  ocfs2_rec_clusters(el, rec));
3713                goto out;
3714        }
3715
3716        *p_blkno = le64_to_cpu(rec->e_blkno);
3717        *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3718        if (e_cpos)
3719                *e_cpos = le32_to_cpu(rec->e_cpos);
3720out:
3721        brelse(eb_bh);
3722        return ret;
3723}
3724
3725typedef int (xattr_bucket_func)(struct inode *inode,
3726                                struct ocfs2_xattr_bucket *bucket,
3727                                void *para);
3728
3729static int ocfs2_find_xe_in_bucket(struct inode *inode,
3730                                   struct ocfs2_xattr_bucket *bucket,
3731                                   int name_index,
3732                                   const char *name,
3733                                   u32 name_hash,
3734                                   u16 *xe_index,
3735                                   int *found)
3736{
3737        int i, ret = 0, cmp = 1, block_off, new_offset;
3738        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3739        size_t name_len = strlen(name);
3740        struct ocfs2_xattr_entry *xe = NULL;
3741        char *xe_name;
3742
3743        /*
3744         * We don't use binary search in the bucket because there
3745         * may be multiple entries with the same name hash.
3746         */
3747        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3748                xe = &xh->xh_entries[i];
3749
3750                if (name_hash > le32_to_cpu(xe->xe_name_hash))
3751                        continue;
3752                else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3753                        break;
3754
3755                cmp = name_index - ocfs2_xattr_get_type(xe);
3756                if (!cmp)
3757                        cmp = name_len - xe->xe_name_len;
3758                if (cmp)
3759                        continue;
3760
3761                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3762                                                        xh,
3763                                                        i,
3764                                                        &block_off,
3765                                                        &new_offset);
3766                if (ret) {
3767                        mlog_errno(ret);
3768                        break;
3769                }
3770
3771
3772                xe_name = bucket_block(bucket, block_off) + new_offset;
3773                if (!memcmp(name, xe_name, name_len)) {
3774                        *xe_index = i;
3775                        *found = 1;
3776                        ret = 0;
3777                        break;
3778                }
3779        }
3780
3781        return ret;
3782}
3783
3784/*
3785 * Find the specified xattr entry in a series of buckets.
3786 * This series start from p_blkno and last for num_clusters.
3787 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3788 * the num of the valid buckets.
3789 *
3790 * Return the buffer_head this xattr should reside in. And if the xattr's
3791 * hash is in the gap of 2 buckets, return the lower bucket.
3792 */
3793static int ocfs2_xattr_bucket_find(struct inode *inode,
3794                                   int name_index,
3795                                   const char *name,
3796                                   u32 name_hash,
3797                                   u64 p_blkno,
3798                                   u32 first_hash,
3799                                   u32 num_clusters,
3800                                   struct ocfs2_xattr_search *xs)
3801{
3802        int ret, found = 0;
3803        struct ocfs2_xattr_header *xh = NULL;
3804        struct ocfs2_xattr_entry *xe = NULL;
3805        u16 index = 0;
3806        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3807        int low_bucket = 0, bucket, high_bucket;
3808        struct ocfs2_xattr_bucket *search;
3809        u32 last_hash;
3810        u64 blkno, lower_blkno = 0;
3811
3812        search = ocfs2_xattr_bucket_new(inode);
3813        if (!search) {
3814                ret = -ENOMEM;
3815                mlog_errno(ret);
3816                goto out;
3817        }
3818
3819        ret = ocfs2_read_xattr_bucket(search, p_blkno);
3820        if (ret) {
3821                mlog_errno(ret);
3822                goto out;
3823        }
3824
3825        xh = bucket_xh(search);
3826        high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3827        while (low_bucket <= high_bucket) {
3828                ocfs2_xattr_bucket_relse(search);
3829
3830                bucket = (low_bucket + high_bucket) / 2;
3831                blkno = p_blkno + bucket * blk_per_bucket;
3832                ret = ocfs2_read_xattr_bucket(search, blkno);
3833                if (ret) {
3834                        mlog_errno(ret);
3835                        goto out;
3836                }
3837
3838                xh = bucket_xh(search);
3839                xe = &xh->xh_entries[0];
3840                if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3841                        high_bucket = bucket - 1;
3842                        continue;
3843                }
3844
3845                /*
3846                 * Check whether the hash of the last entry in our
3847                 * bucket is larger than the search one. for an empty
3848                 * bucket, the last one is also the first one.
3849                 */
3850                if (xh->xh_count)
3851                        xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3852
3853                last_hash = le32_to_cpu(xe->xe_name_hash);
3854
3855                /* record lower_blkno which may be the insert place. */
3856                lower_blkno = blkno;
3857
3858                if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3859                        low_bucket = bucket + 1;
3860                        continue;
3861                }
3862
3863                /* the searched xattr should reside in this bucket if exists. */
3864                ret = ocfs2_find_xe_in_bucket(inode, search,
3865                                              name_index, name, name_hash,
3866                                              &index, &found);
3867                if (ret) {
3868                        mlog_errno(ret);
3869                        goto out;
3870                }
3871                break;
3872        }
3873
3874        /*
3875         * Record the bucket we have found.
3876         * When the xattr's hash value is in the gap of 2 buckets, we will
3877         * always set it to the previous bucket.
3878         */
3879        if (!lower_blkno)
3880                lower_blkno = p_blkno;
3881
3882        /* This should be in cache - we just read it during the search */
3883        ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3884        if (ret) {
3885                mlog_errno(ret);
3886                goto out;
3887        }
3888
3889        xs->header = bucket_xh(xs->bucket);
3890        xs->base = bucket_block(xs->bucket, 0);
3891        xs->end = xs->base + inode->i_sb->s_blocksize;
3892
3893        if (found) {
3894                xs->here = &xs->header->xh_entries[index];
3895                trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3896                        name, name_index, name_hash,
3897                        (unsigned long long)bucket_blkno(xs->bucket),
3898                        index);
3899        } else
3900                ret = -ENODATA;
3901
3902out:
3903        ocfs2_xattr_bucket_free(search);
3904        return ret;
3905}
3906
3907static int ocfs2_xattr_index_block_find(struct inode *inode,
3908                                        struct buffer_head *root_bh,
3909                                        int name_index,
3910                                        const char *name,
3911                                        struct ocfs2_xattr_search *xs)
3912{
3913        int ret;
3914        struct ocfs2_xattr_block *xb =
3915                        (struct ocfs2_xattr_block *)root_bh->b_data;
3916        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3917        struct ocfs2_extent_list *el = &xb_root->xt_list;
3918        u64 p_blkno = 0;
3919        u32 first_hash, num_clusters = 0;
3920        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3921
3922        if (le16_to_cpu(el->l_next_free_rec) == 0)
3923                return -ENODATA;
3924
3925        trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3926                                        name, name_index, name_hash,
3927                                        (unsigned long long)root_bh->b_blocknr,
3928                                        -1);
3929
3930        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3931                                  &num_clusters, el);
3932        if (ret) {
3933                mlog_errno(ret);
3934                goto out;
3935        }
3936
3937        BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3938
3939        trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3940                                        name, name_index, first_hash,
3941                                        (unsigned long long)p_blkno,
3942                                        num_clusters);
3943
3944        ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3945                                      p_blkno, first_hash, num_clusters, xs);
3946
3947out:
3948        return ret;
3949}
3950
3951static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3952                                       u64 blkno,
3953                                       u32 clusters,
3954                                       xattr_bucket_func *func,
3955                                       void *para)
3956{
3957        int i, ret = 0;
3958        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3959        u32 num_buckets = clusters * bpc;
3960        struct ocfs2_xattr_bucket *bucket;
3961
3962        bucket = ocfs2_xattr_bucket_new(inode);
3963        if (!bucket) {
3964                mlog_errno(-ENOMEM);
3965                return -ENOMEM;
3966        }
3967
3968        trace_ocfs2_iterate_xattr_buckets(
3969                (unsigned long long)OCFS2_I(inode)->ip_blkno,
3970                (unsigned long long)blkno, clusters);
3971
3972        for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3973                ret = ocfs2_read_xattr_bucket(bucket, blkno);
3974                if (ret) {
3975                        mlog_errno(ret);
3976                        break;
3977                }
3978
3979                /*
3980                 * The real bucket num in this series of blocks is stored
3981                 * in the 1st bucket.
3982                 */
3983                if (i == 0)
3984                        num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3985
3986                trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
3987                     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3988                if (func) {
3989                        ret = func(inode, bucket, para);
3990                        if (ret && ret != -ERANGE)
3991                                mlog_errno(ret);
3992                        /* Fall through to bucket_relse() */
3993                }
3994
3995                ocfs2_xattr_bucket_relse(bucket);
3996                if (ret)
3997                        break;
3998        }
3999
4000        ocfs2_xattr_bucket_free(bucket);
4001        return ret;
4002}
4003
4004struct ocfs2_xattr_tree_list {
4005        char *buffer;
4006        size_t buffer_size;
4007        size_t result;
4008};
4009
4010static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4011                                             struct ocfs2_xattr_header *xh,
4012                                             int index,
4013                                             int *block_off,
4014                                             int *new_offset)
4015{
4016        u16 name_offset;
4017
4018        if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4019                return -EINVAL;
4020
4021        name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4022
4023        *block_off = name_offset >> sb->s_blocksize_bits;
4024        *new_offset = name_offset % sb->s_blocksize;
4025
4026        return 0;
4027}
4028
4029static int ocfs2_list_xattr_bucket(struct inode *inode,
4030                                   struct ocfs2_xattr_bucket *bucket,
4031                                   void *para)
4032{
4033        int ret = 0, type;
4034        struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4035        int i, block_off, new_offset;
4036        const char *prefix, *name;
4037
4038        for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4039                struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4040                type = ocfs2_xattr_get_type(entry);
4041                prefix = ocfs2_xattr_prefix(type);
4042
4043                if (prefix) {
4044                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4045                                                                bucket_xh(bucket),
4046                                                                i,
4047                                                                &block_off,
4048                                                                &new_offset);
4049                        if (ret)
4050                                break;
4051
4052                        name = (const char *)bucket_block(bucket, block_off) +
4053                                new_offset;
4054                        ret = ocfs2_xattr_list_entry(xl->buffer,
4055                                                     xl->buffer_size,
4056                                                     &xl->result,
4057                                                     prefix, name,
4058                                                     entry->xe_name_len);
4059                        if (ret)
4060                                break;
4061                }
4062        }
4063
4064        return ret;
4065}
4066
4067static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4068                                           struct buffer_head *blk_bh,
4069                                           xattr_tree_rec_func *rec_func,
4070                                           void *para)
4071{
4072        struct ocfs2_xattr_block *xb =
4073                        (struct ocfs2_xattr_block *)blk_bh->b_data;
4074        struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4075        int ret = 0;
4076        u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4077        u64 p_blkno = 0;
4078
4079        if (!el->l_next_free_rec || !rec_func)
4080                return 0;
4081
4082        while (name_hash > 0) {
4083                ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4084                                          &e_cpos, &num_clusters, el);
4085                if (ret) {
4086                        mlog_errno(ret);
4087                        break;
4088                }
4089
4090                ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4091                               num_clusters, para);
4092                if (ret) {
4093                        if (ret != -ERANGE)
4094                                mlog_errno(ret);
4095                        break;
4096                }
4097
4098                if (e_cpos == 0)
4099                        break;
4100
4101                name_hash = e_cpos - 1;
4102        }
4103
4104        return ret;
4105
4106}
4107
4108static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4109                                     struct buffer_head *root_bh,
4110                                     u64 blkno, u32 cpos, u32 len, void *para)
4111{
4112        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4113                                           ocfs2_list_xattr_bucket, para);
4114}
4115
4116static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4117                                             struct buffer_head *blk_bh,
4118                                             char *buffer,
4119                                             size_t buffer_size)
4120{
4121        int ret;
4122        struct ocfs2_xattr_tree_list xl = {
4123                .buffer = buffer,
4124                .buffer_size = buffer_size,
4125                .result = 0,
4126        };
4127
4128        ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4129                                              ocfs2_list_xattr_tree_rec, &xl);
4130        if (ret) {
4131                mlog_errno(ret);
4132                goto out;
4133        }
4134
4135        ret = xl.result;
4136out:
4137        return ret;
4138}
4139
4140static int cmp_xe(const void *a, const void *b)
4141{
4142        const struct ocfs2_xattr_entry *l = a, *r = b;
4143        u32 l_hash = le32_to_cpu(l->xe_name_hash);
4144        u32 r_hash = le32_to_cpu(r->xe_name_hash);
4145
4146        if (l_hash > r_hash)
4147                return 1;
4148        if (l_hash < r_hash)
4149                return -1;
4150        return 0;
4151}
4152
4153static void swap_xe(void *a, void *b, int size)
4154{
4155        struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4156
4157        tmp = *l;
4158        memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4159        memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4160}
4161
4162/*
4163 * When the ocfs2_xattr_block is filled up, new bucket will be created
4164 * and all the xattr entries will be moved to the new bucket.
4165 * The header goes at the start of the bucket, and the names+values are
4166 * filled from the end.  This is why *target starts as the last buffer.
4167 * Note: we need to sort the entries since they are not saved in order
4168 * in the ocfs2_xattr_block.
4169 */
4170static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4171                                           struct buffer_head *xb_bh,
4172                                           struct ocfs2_xattr_bucket *bucket)
4173{
4174        int i, blocksize = inode->i_sb->s_blocksize;
4175        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4176        u16 offset, size, off_change;
4177        struct ocfs2_xattr_entry *xe;
4178        struct ocfs2_xattr_block *xb =
4179                                (struct ocfs2_xattr_block *)xb_bh->b_data;
4180        struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4181        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4182        u16 count = le16_to_cpu(xb_xh->xh_count);
4183        char *src = xb_bh->b_data;
4184        char *target = bucket_block(bucket, blks - 1);
4185
4186        trace_ocfs2_cp_xattr_block_to_bucket_begin(
4187                                (unsigned long long)xb_bh->b_blocknr,
4188                                (unsigned long long)bucket_blkno(bucket));
4189
4190        for (i = 0; i < blks; i++)
4191                memset(bucket_block(bucket, i), 0, blocksize);
4192
4193        /*
4194         * Since the xe_name_offset is based on ocfs2_xattr_header,
4195         * there is a offset change corresponding to the change of
4196         * ocfs2_xattr_header's position.
4197         */
4198        off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4199        xe = &xb_xh->xh_entries[count - 1];
4200        offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4201        size = blocksize - offset;
4202
4203        /* copy all the names and values. */
4204        memcpy(target + offset, src + offset, size);
4205
4206        /* Init new header now. */
4207        xh->xh_count = xb_xh->xh_count;
4208        xh->xh_num_buckets = cpu_to_le16(1);
4209        xh->xh_name_value_len = cpu_to_le16(size);
4210        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4211
4212        /* copy all the entries. */
4213        target = bucket_block(bucket, 0);
4214        offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4215        size = count * sizeof(struct ocfs2_xattr_entry);
4216        memcpy(target + offset, (char *)xb_xh + offset, size);
4217
4218        /* Change the xe offset for all the xe because of the move. */
4219        off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4220                 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4221        for (i = 0; i < count; i++)
4222                le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4223
4224        trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4225
4226        sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4227             cmp_xe, swap_xe);
4228}
4229
4230/*
4231 * After we move xattr from block to index btree, we have to
4232 * update ocfs2_xattr_search to the new xe and base.
4233 *
4234 * When the entry is in xattr block, xattr_bh indicates the storage place.
4235 * While if the entry is in index b-tree, "bucket" indicates the
4236 * real place of the xattr.
4237 */
4238static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4239                                            struct ocfs2_xattr_search *xs,
4240                                            struct buffer_head *old_bh)
4241{
4242        char *buf = old_bh->b_data;
4243        struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4244        struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4245        int i;
4246
4247        xs->header = bucket_xh(xs->bucket);
4248        xs->base = bucket_block(xs->bucket, 0);
4249        xs->end = xs->base + inode->i_sb->s_blocksize;
4250
4251        if (xs->not_found)
4252                return;
4253
4254        i = xs->here - old_xh->xh_entries;
4255        xs->here = &xs->header->xh_entries[i];
4256}
4257
4258static int ocfs2_xattr_create_index_block(struct inode *inode,
4259                                          struct ocfs2_xattr_search *xs,
4260                                          struct ocfs2_xattr_set_ctxt *ctxt)
4261{
4262        int ret;
4263        u32 bit_off, len;
4264        u64 blkno;
4265        handle_t *handle = ctxt->handle;
4266        struct ocfs2_inode_info *oi = OCFS2_I(inode);
4267        struct buffer_head *xb_bh = xs->xattr_bh;
4268        struct ocfs2_xattr_block *xb =
4269                        (struct ocfs2_xattr_block *)xb_bh->b_data;
4270        struct ocfs2_xattr_tree_root *xr;
4271        u16 xb_flags = le16_to_cpu(xb->xb_flags);
4272
4273        trace_ocfs2_xattr_create_index_block_begin(
4274                                (unsigned long long)xb_bh->b_blocknr);
4275
4276        BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4277        BUG_ON(!xs->bucket);
4278
4279        /*
4280         * XXX:
4281         * We can use this lock for now, and maybe move to a dedicated mutex
4282         * if performance becomes a problem later.
4283         */
4284        down_write(&oi->ip_alloc_sem);
4285
4286        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4287                                      OCFS2_JOURNAL_ACCESS_WRITE);
4288        if (ret) {
4289                mlog_errno(ret);
4290                goto out;
4291        }
4292
4293        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4294                                     1, 1, &bit_off, &len);
4295        if (ret) {
4296                mlog_errno(ret);
4297                goto out;
4298        }
4299
4300        /*
4301         * The bucket may spread in many blocks, and
4302         * we will only touch the 1st block and the last block
4303         * in the whole bucket(one for entry and one for data).
4304         */
4305        blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4306
4307        trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4308
4309        ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4310        if (ret) {
4311                mlog_errno(ret);
4312                goto out;
4313        }
4314
4315        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4316                                                OCFS2_JOURNAL_ACCESS_CREATE);
4317        if (ret) {
4318                mlog_errno(ret);
4319                goto out;
4320        }
4321
4322        ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4323        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4324
4325        ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4326
4327        /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4328        memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4329               offsetof(struct ocfs2_xattr_block, xb_attrs));
4330
4331        xr = &xb->xb_attrs.xb_root;
4332        xr->xt_clusters = cpu_to_le32(1);
4333        xr->xt_last_eb_blk = 0;
4334        xr->xt_list.l_tree_depth = 0;
4335        xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4336        xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4337
4338        xr->xt_list.l_recs[0].e_cpos = 0;
4339        xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4340        xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4341
4342        xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4343
4344        ocfs2_journal_dirty(handle, xb_bh);
4345
4346out:
4347        up_write(&oi->ip_alloc_sem);
4348
4349        return ret;
4350}
4351
4352static int cmp_xe_offset(const void *a, const void *b)
4353{
4354        const struct ocfs2_xattr_entry *l = a, *r = b;
4355        u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4356        u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4357
4358        if (l_name_offset < r_name_offset)
4359                return 1;
4360        if (l_name_offset > r_name_offset)
4361                return -1;
4362        return 0;
4363}
4364
4365/*
4366 * defrag a xattr bucket if we find that the bucket has some
4367 * holes beteen name/value pairs.
4368 * We will move all the name/value pairs to the end of the bucket
4369 * so that we can spare some space for insertion.
4370 */
4371static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4372                                     handle_t *handle,
4373                                     struct ocfs2_xattr_bucket *bucket)
4374{
4375        int ret, i;
4376        size_t end, offset, len;
4377        struct ocfs2_xattr_header *xh;
4378        char *entries, *buf, *bucket_buf = NULL;
4379        u64 blkno = bucket_blkno(bucket);
4380        u16 xh_free_start;
4381        size_t blocksize = inode->i_sb->s_blocksize;
4382        struct ocfs2_xattr_entry *xe;
4383
4384        /*
4385         * In order to make the operation more efficient and generic,
4386         * we copy all the blocks into a contiguous memory and do the
4387         * defragment there, so if anything is error, we will not touch
4388         * the real block.
4389         */
4390        bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4391        if (!bucket_buf) {
4392                ret = -EIO;
4393                goto out;
4394        }
4395
4396        buf = bucket_buf;
4397        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4398                memcpy(buf, bucket_block(bucket, i), blocksize);
4399
4400        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4401                                                OCFS2_JOURNAL_ACCESS_WRITE);
4402        if (ret < 0) {
4403                mlog_errno(ret);
4404                goto out;
4405        }
4406
4407        xh = (struct ocfs2_xattr_header *)bucket_buf;
4408        entries = (char *)xh->xh_entries;
4409        xh_free_start = le16_to_cpu(xh->xh_free_start);
4410
4411        trace_ocfs2_defrag_xattr_bucket(
4412             (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4413             xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4414
4415        /*
4416         * sort all the entries by their offset.
4417         * the largest will be the first, so that we can
4418         * move them to the end one by one.
4419         */
4420        sort(entries, le16_to_cpu(xh->xh_count),
4421             sizeof(struct ocfs2_xattr_entry),
4422             cmp_xe_offset, swap_xe);
4423
4424        /* Move all name/values to the end of the bucket. */
4425        xe = xh->xh_entries;
4426        end = OCFS2_XATTR_BUCKET_SIZE;
4427        for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4428                offset = le16_to_cpu(xe->xe_name_offset);
4429                len = namevalue_size_xe(xe);
4430
4431                /*
4432                 * We must make sure that the name/value pair
4433                 * exist in the same block. So adjust end to
4434                 * the previous block end if needed.
4435                 */
4436                if (((end - len) / blocksize !=
4437                        (end - 1) / blocksize))
4438                        end = end - end % blocksize;
4439
4440                if (end > offset + len) {
4441                        memmove(bucket_buf + end - len,
4442                                bucket_buf + offset, len);
4443                        xe->xe_name_offset = cpu_to_le16(end - len);
4444                }
4445
4446                mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4447                                "bucket %llu\n", (unsigned long long)blkno);
4448
4449                end -= len;
4450        }
4451
4452        mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4453                        "bucket %llu\n", (unsigned long long)blkno);
4454
4455        if (xh_free_start == end)
4456                goto out;
4457
4458        memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4459        xh->xh_free_start = cpu_to_le16(end);
4460
4461        /* sort the entries by their name_hash. */
4462        sort(entries, le16_to_cpu(xh->xh_count),
4463             sizeof(struct ocfs2_xattr_entry),
4464             cmp_xe, swap_xe);
4465
4466        buf = bucket_buf;
4467        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4468                memcpy(bucket_block(bucket, i), buf, blocksize);
4469        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4470
4471out:
4472        kfree(bucket_buf);
4473        return ret;
4474}
4475
4476/*
4477 * prev_blkno points to the start of an existing extent.  new_blkno
4478 * points to a newly allocated extent.  Because we know each of our
4479 * clusters contains more than bucket, we can easily split one cluster
4480 * at a bucket boundary.  So we take the last cluster of the existing
4481 * extent and split it down the middle.  We move the last half of the
4482 * buckets in the last cluster of the existing extent over to the new
4483 * extent.
4484 *
4485 * first_bh is the buffer at prev_blkno so we can update the existing
4486 * extent's bucket count.  header_bh is the bucket were we were hoping
4487 * to insert our xattr.  If the bucket move places the target in the new
4488 * extent, we'll update first_bh and header_bh after modifying the old
4489 * extent.
4490 *
4491 * first_hash will be set as the 1st xe's name_hash in the new extent.
4492 */
4493static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4494                                               handle_t *handle,
4495                                               struct ocfs2_xattr_bucket *first,
4496                                               struct ocfs2_xattr_bucket *target,
4497                                               u64 new_blkno,
4498                                               u32 num_clusters,
4499                                               u32 *first_hash)
4500{
4501        int ret;
4502        struct super_block *sb = inode->i_sb;
4503        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4504        int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4505        int to_move = num_buckets / 2;
4506        u64 src_blkno;
4507        u64 last_cluster_blkno = bucket_blkno(first) +
4508                ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4509
4510        BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4511        BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4512
4513        trace_ocfs2_mv_xattr_bucket_cross_cluster(
4514                                (unsigned long long)last_cluster_blkno,
4515                                (unsigned long long)new_blkno);
4516
4517        ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4518                                     last_cluster_blkno, new_blkno,
4519                                     to_move, first_hash);
4520        if (ret) {
4521                mlog_errno(ret);
4522                goto out;
4523        }
4524
4525        /* This is the first bucket that got moved */
4526        src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4527
4528        /*
4529         * If the target bucket was part of the moved buckets, we need to
4530         * update first and target.
4531         */
4532        if (bucket_blkno(target) >= src_blkno) {
4533                /* Find the block for the new target bucket */
4534                src_blkno = new_blkno +
4535                        (bucket_blkno(target) - src_blkno);
4536
4537                ocfs2_xattr_bucket_relse(first);
4538                ocfs2_xattr_bucket_relse(target);
4539
4540                /*
4541                 * These shouldn't fail - the buffers are in the
4542                 * journal from ocfs2_cp_xattr_bucket().
4543                 */
4544                ret = ocfs2_read_xattr_bucket(first, new_blkno);
4545                if (ret) {
4546                        mlog_errno(ret);
4547                        goto out;
4548                }
4549                ret = ocfs2_read_xattr_bucket(target, src_blkno);
4550                if (ret)
4551                        mlog_errno(ret);
4552
4553        }
4554
4555out:
4556        return ret;
4557}
4558
4559/*
4560 * Find the suitable pos when we divide a bucket into 2.
4561 * We have to make sure the xattrs with the same hash value exist
4562 * in the same bucket.
4563 *
4564 * If this ocfs2_xattr_header covers more than one hash value, find a
4565 * place where the hash value changes.  Try to find the most even split.
4566 * The most common case is that all entries have different hash values,
4567 * and the first check we make will find a place to split.
4568 */
4569static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4570{
4571        struct ocfs2_xattr_entry *entries = xh->xh_entries;
4572        int count = le16_to_cpu(xh->xh_count);
4573        int delta, middle = count / 2;
4574
4575        /*
4576         * We start at the middle.  Each step gets farther away in both
4577         * directions.  We therefore hit the change in hash value
4578         * nearest to the middle.  Note that this loop does not execute for
4579         * count < 2.
4580         */
4581        for (delta = 0; delta < middle; delta++) {
4582                /* Let's check delta earlier than middle */
4583                if (cmp_xe(&entries[middle - delta - 1],
4584                           &entries[middle - delta]))
4585                        return middle - delta;
4586
4587                /* For even counts, don't walk off the end */
4588                if ((middle + delta + 1) == count)
4589                        continue;
4590
4591                /* Now try delta past middle */
4592                if (cmp_xe(&entries[middle + delta],
4593                           &entries[middle + delta + 1]))
4594                        return middle + delta + 1;
4595        }
4596
4597        /* Every entry had the same hash */
4598        return count;
4599}
4600
4601/*
4602 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4603 * first_hash will record the 1st hash of the new bucket.
4604 *
4605 * Normally half of the xattrs will be moved.  But we have to make
4606 * sure that the xattrs with the same hash value are stored in the
4607 * same bucket. If all the xattrs in this bucket have the same hash
4608 * value, the new bucket will be initialized as an empty one and the
4609 * first_hash will be initialized as (hash_value+1).
4610 */
4611static int ocfs2_divide_xattr_bucket(struct inode *inode,
4612                                    handle_t *handle,
4613                                    u64 blk,
4614                                    u64 new_blk,
4615                                    u32 *first_hash,
4616                                    int new_bucket_head)
4617{
4618        int ret, i;
4619        int count, start, len, name_value_len = 0, name_offset = 0;
4620        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4621        struct ocfs2_xattr_header *xh;
4622        struct ocfs2_xattr_entry *xe;
4623        int blocksize = inode->i_sb->s_blocksize;
4624
4625        trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4626                                              (unsigned long long)new_blk);
4627
4628        s_bucket = ocfs2_xattr_bucket_new(inode);
4629        t_bucket = ocfs2_xattr_bucket_new(inode);
4630        if (!s_bucket || !t_bucket) {
4631                ret = -ENOMEM;
4632                mlog_errno(ret);
4633                goto out;
4634        }
4635
4636        ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4637        if (ret) {
4638                mlog_errno(ret);
4639                goto out;
4640        }
4641
4642        ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4643                                                OCFS2_JOURNAL_ACCESS_WRITE);
4644        if (ret) {
4645                mlog_errno(ret);
4646                goto out;
4647        }
4648
4649        /*
4650         * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4651         * there's no need to read it.
4652         */
4653        ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4654        if (ret) {
4655                mlog_errno(ret);
4656                goto out;
4657        }
4658
4659        /*
4660         * Hey, if we're overwriting t_bucket, what difference does
4661         * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4662         * same part of ocfs2_cp_xattr_bucket().
4663         */
4664        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4665                                                new_bucket_head ?
4666                                                OCFS2_JOURNAL_ACCESS_CREATE :
4667                                                OCFS2_JOURNAL_ACCESS_WRITE);
4668        if (ret) {
4669                mlog_errno(ret);
4670                goto out;
4671        }
4672
4673        xh = bucket_xh(s_bucket);
4674        count = le16_to_cpu(xh->xh_count);
4675        start = ocfs2_xattr_find_divide_pos(xh);
4676
4677        if (start == count) {
4678                xe = &xh->xh_entries[start-1];
4679
4680                /*
4681                 * initialized a new empty bucket here.
4682                 * The hash value is set as one larger than
4683                 * that of the last entry in the previous bucket.
4684                 */
4685                for (i = 0; i < t_bucket->bu_blocks; i++)
4686                        memset(bucket_block(t_bucket, i), 0, blocksize);
4687
4688                xh = bucket_xh(t_bucket);
4689                xh->xh_free_start = cpu_to_le16(blocksize);
4690                xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4691                le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4692
4693                goto set_num_buckets;
4694        }
4695
4696        /* copy the whole bucket to the new first. */
4697        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4698
4699        /* update the new bucket. */
4700        xh = bucket_xh(t_bucket);
4701
4702        /*
4703         * Calculate the total name/value len and xh_free_start for
4704         * the old bucket first.
4705         */
4706        name_offset = OCFS2_XATTR_BUCKET_SIZE;
4707        name_value_len = 0;
4708        for (i = 0; i < start; i++) {
4709                xe = &xh->xh_entries[i];
4710                name_value_len += namevalue_size_xe(xe);
4711                if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4712                        name_offset = le16_to_cpu(xe->xe_name_offset);
4713        }
4714
4715        /*
4716         * Now begin the modification to the new bucket.
4717         *
4718         * In the new bucket, We just move the xattr entry to the beginning
4719         * and don't touch the name/value. So there will be some holes in the
4720         * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4721         * called.
4722         */
4723        xe = &xh->xh_entries[start];
4724        len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4725        trace_ocfs2_divide_xattr_bucket_move(len,
4726                        (int)((char *)xe - (char *)xh),
4727                        (int)((char *)xh->xh_entries - (char *)xh));
4728        memmove((char *)xh->xh_entries, (char *)xe, len);
4729        xe = &xh->xh_entries[count - start];
4730        len = sizeof(struct ocfs2_xattr_entry) * start;
4731        memset((char *)xe, 0, len);
4732
4733        le16_add_cpu(&xh->xh_count, -start);
4734        le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4735
4736        /* Calculate xh_free_start for the new bucket. */
4737        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4738        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4739                xe = &xh->xh_entries[i];
4740                if (le16_to_cpu(xe->xe_name_offset) <
4741                    le16_to_cpu(xh->xh_free_start))
4742                        xh->xh_free_start = xe->xe_name_offset;
4743        }
4744
4745set_num_buckets:
4746        /* set xh->xh_num_buckets for the new xh. */
4747        if (new_bucket_head)
4748                xh->xh_num_buckets = cpu_to_le16(1);
4749        else
4750                xh->xh_num_buckets = 0;
4751
4752        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4753
4754        /* store the first_hash of the new bucket. */
4755        if (first_hash)
4756                *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4757
4758        /*
4759         * Now only update the 1st block of the old bucket.  If we
4760         * just added a new empty bucket, there is no need to modify
4761         * it.
4762         */
4763        if (start == count)
4764                goto out;
4765
4766        xh = bucket_xh(s_bucket);
4767        memset(&xh->xh_entries[start], 0,
4768               sizeof(struct ocfs2_xattr_entry) * (count - start));
4769        xh->xh_count = cpu_to_le16(start);
4770        xh->xh_free_start = cpu_to_le16(name_offset);
4771        xh->xh_name_value_len = cpu_to_le16(name_value_len);
4772
4773        ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4774
4775out:
4776        ocfs2_xattr_bucket_free(s_bucket);
4777        ocfs2_xattr_bucket_free(t_bucket);
4778
4779        return ret;
4780}
4781
4782/*
4783 * Copy xattr from one bucket to another bucket.
4784 *
4785 * The caller must make sure that the journal transaction
4786 * has enough space for journaling.
4787 */
4788static int ocfs2_cp_xattr_bucket(struct inode *inode,
4789                                 handle_t *handle,
4790                                 u64 s_blkno,
4791                                 u64 t_blkno,
4792                                 int t_is_new)
4793{
4794        int ret;
4795        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4796
4797        BUG_ON(s_blkno == t_blkno);
4798
4799        trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4800                                    (unsigned long long)t_blkno,
4801                                    t_is_new);
4802
4803        s_bucket = ocfs2_xattr_bucket_new(inode);
4804        t_bucket = ocfs2_xattr_bucket_new(inode);
4805        if (!s_bucket || !t_bucket) {
4806                ret = -ENOMEM;
4807                mlog_errno(ret);
4808                goto out;
4809        }
4810
4811        ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4812        if (ret)
4813                goto out;
4814
4815        /*
4816         * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4817         * there's no need to read it.
4818         */
4819        ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4820        if (ret)
4821                goto out;
4822
4823        /*
4824         * Hey, if we're overwriting t_bucket, what difference does
4825         * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4826         * cluster to fill, we came here from
4827         * ocfs2_mv_xattr_buckets(), and it is really new -
4828         * ACCESS_CREATE is required.  But we also might have moved data
4829         * out of t_bucket before extending back into it.
4830         * ocfs2_add_new_xattr_bucket() can do this - its call to
4831         * ocfs2_add_new_xattr_cluster() may have created a new extent
4832         * and copied out the end of the old extent.  Then it re-extends
4833         * the old extent back to create space for new xattrs.  That's
4834         * how we get here, and the bucket isn't really new.
4835         */
4836        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4837                                                t_is_new ?
4838                                                OCFS2_JOURNAL_ACCESS_CREATE :
4839                                                OCFS2_JOURNAL_ACCESS_WRITE);
4840        if (ret)
4841                goto out;
4842
4843        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4844        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4845
4846out:
4847        ocfs2_xattr_bucket_free(t_bucket);
4848        ocfs2_xattr_bucket_free(s_bucket);
4849
4850        return ret;
4851}
4852
4853/*
4854 * src_blk points to the start of an existing extent.  last_blk points to
4855 * last cluster in that extent.  to_blk points to a newly allocated
4856 * extent.  We copy the buckets from the cluster at last_blk to the new
4857 * extent.  If start_bucket is non-zero, we skip that many buckets before
4858 * we start copying.  The new extent's xh_num_buckets gets set to the
4859 * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4860 * by the same amount.
4861 */
4862static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4863                                  u64 src_blk, u64 last_blk, u64 to_blk,
4864                                  unsigned int start_bucket,
4865                                  u32 *first_hash)
4866{
4867        int i, ret, credits;
4868        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4869        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4870        int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4871        struct ocfs2_xattr_bucket *old_first, *new_first;
4872
4873        trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4874                                     (unsigned long long)to_blk);
4875
4876        BUG_ON(start_bucket >= num_buckets);
4877        if (start_bucket) {
4878                num_buckets -= start_bucket;
4879                last_blk += (start_bucket * blks_per_bucket);
4880        }
4881
4882        /* The first bucket of the original extent */
4883        old_first = ocfs2_xattr_bucket_new(inode);
4884        /* The first bucket of the new extent */
4885        new_first = ocfs2_xattr_bucket_new(inode);
4886        if (!old_first || !new_first) {
4887                ret = -ENOMEM;
4888                mlog_errno(ret);
4889                goto out;
4890        }
4891
4892        ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4893        if (ret) {
4894                mlog_errno(ret);
4895                goto out;
4896        }
4897
4898        /*
4899         * We need to update the first bucket of the old extent and all
4900         * the buckets going to the new extent.
4901         */
4902        credits = ((num_buckets + 1) * blks_per_bucket);
4903        ret = ocfs2_extend_trans(handle, credits);
4904        if (ret) {
4905                mlog_errno(ret);
4906                goto out;
4907        }
4908
4909        ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4910                                                OCFS2_JOURNAL_ACCESS_WRITE);
4911        if (ret) {
4912                mlog_errno(ret);
4913                goto out;
4914        }
4915
4916        for (i = 0; i < num_buckets; i++) {
4917                ret = ocfs2_cp_xattr_bucket(inode, handle,
4918                                            last_blk + (i * blks_per_bucket),
4919                                            to_blk + (i * blks_per_bucket),
4920                                            1);
4921                if (ret) {
4922                        mlog_errno(ret);
4923                        goto out;
4924                }
4925        }
4926
4927        /*
4928         * Get the new bucket ready before we dirty anything
4929         * (This actually shouldn't fail, because we already dirtied
4930         * it once in ocfs2_cp_xattr_bucket()).
4931         */
4932        ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4933        if (ret) {
4934                mlog_errno(ret);
4935                goto out;
4936        }
4937        ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4938                                                OCFS2_JOURNAL_ACCESS_WRITE);
4939        if (ret) {
4940                mlog_errno(ret);
4941                goto out;
4942        }
4943
4944        /* Now update the headers */
4945        le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4946        ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4947
4948        bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4949        ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4950
4951        if (first_hash)
4952                *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4953
4954out:
4955        ocfs2_xattr_bucket_free(new_first);
4956        ocfs2_xattr_bucket_free(old_first);
4957        return ret;
4958}
4959
4960/*
4961 * Move some xattrs in this cluster to the new cluster.
4962 * This function should only be called when bucket size == cluster size.
4963 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4964 */
4965static int ocfs2_divide_xattr_cluster(struct inode *inode,
4966                                      handle_t *handle,
4967                                      u64 prev_blk,
4968                                      u64 new_blk,
4969                                      u32 *first_hash)
4970{
4971        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4972        int ret, credits = 2 * blk_per_bucket;
4973
4974        BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4975
4976        ret = ocfs2_extend_trans(handle, credits);
4977        if (ret) {
4978                mlog_errno(ret);
4979                return ret;
4980        }
4981
4982        /* Move half of the xattr in start_blk to the next bucket. */
4983        return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4984                                          new_blk, first_hash, 1);
4985}
4986
4987/*
4988 * Move some xattrs from the old cluster to the new one since they are not
4989 * contiguous in ocfs2 xattr tree.
4990 *
4991 * new_blk starts a new separate cluster, and we will move some xattrs from
4992 * prev_blk to it. v_start will be set as the first name hash value in this
4993 * new cluster so that it can be used as e_cpos during tree insertion and
4994 * don't collide with our original b-tree operations. first_bh and header_bh
4995 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4996 * to extend the insert bucket.
4997 *
4998 * The problem is how much xattr should we move to the new one and when should
4999 * we update first_bh and header_bh?
5000 * 1. If cluster size > bucket size, that means the previous cluster has more
5001 *    than 1 bucket, so just move half nums of bucket into the new cluster and
5002 *    update the first_bh and header_bh if the insert bucket has been moved
5003 *    to the new cluster.
5004 * 2. If cluster_size == bucket_size:
5005 *    a) If the previous extent rec has more than one cluster and the insert
5006 *       place isn't in the last cluster, copy the entire last cluster to the
5007 *       new one. This time, we don't need to upate the first_bh and header_bh
5008 *       since they will not be moved into the new cluster.
5009 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5010 *       the new one. And we set the extend flag to zero if the insert place is
5011 *       moved into the new allocated cluster since no extend is needed.
5012 */
5013static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5014                                            handle_t *handle,
5015                                            struct ocfs2_xattr_bucket *first,
5016                                            struct ocfs2_xattr_bucket *target,
5017                                            u64 new_blk,
5018                                            u32 prev_clusters,
5019                                            u32 *v_start,
5020                                            int *extend)
5021{
5022        int ret;
5023
5024        trace_ocfs2_adjust_xattr_cross_cluster(
5025                        (unsigned long long)bucket_blkno(first),
5026                        (unsigned long long)new_blk, prev_clusters);
5027
5028        if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5029                ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5030                                                          handle,
5031                                                          first, target,
5032                                                          new_blk,
5033                                                          prev_clusters,
5034                                                          v_start);
5035                if (ret)
5036                        mlog_errno(ret);
5037        } else {
5038                /* The start of the last cluster in the first extent */
5039                u64 last_blk = bucket_blkno(first) +
5040                        ((prev_clusters - 1) *
5041                         ocfs2_clusters_to_blocks(inode->i_sb, 1));
5042
5043                if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5044                        ret = ocfs2_mv_xattr_buckets(inode, handle,
5045                                                     bucket_blkno(first),
5046                                                     last_blk, new_blk, 0,
5047                                                     v_start);
5048                        if (ret)
5049                                mlog_errno(ret);
5050                } else {
5051                        ret = ocfs2_divide_xattr_cluster(inode, handle,
5052                                                         last_blk, new_blk,
5053                                                         v_start);
5054                        if (ret)
5055                                mlog_errno(ret);
5056
5057                        if ((bucket_blkno(target) == last_blk) && extend)
5058                                *extend = 0;
5059                }
5060        }
5061
5062        return ret;
5063}
5064
5065/*
5066 * Add a new cluster for xattr storage.
5067 *
5068 * If the new cluster is contiguous with the previous one, it will be
5069 * appended to the same extent record, and num_clusters will be updated.
5070 * If not, we will insert a new extent for it and move some xattrs in
5071 * the last cluster into the new allocated one.
5072 * We also need to limit the maximum size of a btree leaf, otherwise we'll
5073 * lose the benefits of hashing because we'll have to search large leaves.
5074 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5075 * if it's bigger).
5076 *
5077 * first_bh is the first block of the previous extent rec and header_bh
5078 * indicates the bucket we will insert the new xattrs. They will be updated
5079 * when the header_bh is moved into the new cluster.
5080 */
5081static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5082                                       struct buffer_head *root_bh,
5083                                       struct ocfs2_xattr_bucket *first,
5084                                       struct ocfs2_xattr_bucket *target,
5085                                       u32 *num_clusters,
5086                                       u32 prev_cpos,
5087                                       int *extend,
5088                                       struct ocfs2_xattr_set_ctxt *ctxt)
5089{
5090        int ret;
5091        u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5092        u32 prev_clusters = *num_clusters;
5093        u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5094        u64 block;
5095        handle_t *handle = ctxt->handle;
5096        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5097        struct ocfs2_extent_tree et;
5098
5099        trace_ocfs2_add_new_xattr_cluster_begin(
5100                (unsigned long long)OCFS2_I(inode)->ip_blkno,
5101                (unsigned long long)bucket_blkno(first),
5102                prev_cpos, prev_clusters);
5103
5104        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5105
5106        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5107                                      OCFS2_JOURNAL_ACCESS_WRITE);
5108        if (ret < 0) {
5109                mlog_errno(ret);
5110                goto leave;
5111        }
5112
5113        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5114                                     clusters_to_add, &bit_off, &num_bits);
5115        if (ret < 0) {
5116                if (ret != -ENOSPC)
5117                        mlog_errno(ret);
5118                goto leave;
5119        }
5120
5121        BUG_ON(num_bits > clusters_to_add);
5122
5123        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5124        trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5125
5126        if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5127            (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5128             OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5129                /*
5130                 * If this cluster is contiguous with the old one and
5131                 * adding this new cluster, we don't surpass the limit of
5132                 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5133                 * initialized and used like other buckets in the previous
5134                 * cluster.
5135                 * So add it as a contiguous one. The caller will handle
5136                 * its init process.
5137                 */
5138                v_start = prev_cpos + prev_clusters;
5139                *num_clusters = prev_clusters + num_bits;
5140        } else {
5141                ret = ocfs2_adjust_xattr_cross_cluster(inode,
5142                                                       handle,
5143                                                       first,
5144                                                       target,
5145                                                       block,
5146                                                       prev_clusters,
5147                                                       &v_start,
5148                                                       extend);
5149                if (ret) {
5150                        mlog_errno(ret);
5151                        goto leave;
5152                }
5153        }
5154
5155        trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5156                                                 v_start, num_bits);
5157        ret = ocfs2_insert_extent(handle, &et, v_start, block,
5158                                  num_bits, 0, ctxt->meta_ac);
5159        if (ret < 0) {
5160                mlog_errno(ret);
5161                goto leave;
5162        }
5163
5164        ocfs2_journal_dirty(handle, root_bh);
5165
5166leave:
5167        return ret;
5168}
5169
5170/*
5171 * We are given an extent.  'first' is the bucket at the very front of
5172 * the extent.  The extent has space for an additional bucket past
5173 * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5174 * of the target bucket.  We wish to shift every bucket past the target
5175 * down one, filling in that additional space.  When we get back to the
5176 * target, we split the target between itself and the now-empty bucket
5177 * at target+1 (aka, target_blkno + blks_per_bucket).
5178 */
5179static int ocfs2_extend_xattr_bucket(struct inode *inode,
5180                                     handle_t *handle,
5181                                     struct ocfs2_xattr_bucket *first,
5182                                     u64 target_blk,
5183                                     u32 num_clusters)
5184{
5185        int ret, credits;
5186        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5187        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5188        u64 end_blk;
5189        u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5190
5191        trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5192                                        (unsigned long long)bucket_blkno(first),
5193                                        num_clusters, new_bucket);
5194
5195        /* The extent must have room for an additional bucket */
5196        BUG_ON(new_bucket >=
5197               (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5198
5199        /* end_blk points to the last existing bucket */
5200        end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5201
5202        /*
5203         * end_blk is the start of the last existing bucket.
5204         * Thus, (end_blk - target_blk) covers the target bucket and
5205         * every bucket after it up to, but not including, the last
5206         * existing bucket.  Then we add the last existing bucket, the
5207         * new bucket, and the first bucket (3 * blk_per_bucket).
5208         */
5209        credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5210        ret = ocfs2_extend_trans(handle, credits);
5211        if (ret) {
5212                mlog_errno(ret);
5213                goto out;
5214        }
5215
5216        ret = ocfs2_xattr_bucket_journal_access(handle, first,
5217                                                OCFS2_JOURNAL_ACCESS_WRITE);
5218        if (ret) {
5219                mlog_errno(ret);
5220                goto out;
5221        }
5222
5223        while (end_blk != target_blk) {
5224                ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5225                                            end_blk + blk_per_bucket, 0);
5226                if (ret)
5227                        goto out;
5228                end_blk -= blk_per_bucket;
5229        }
5230
5231        /* Move half of the xattr in target_blkno to the next bucket. */
5232        ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5233                                        target_blk + blk_per_bucket, NULL, 0);
5234
5235        le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5236        ocfs2_xattr_bucket_journal_dirty(handle, first);
5237
5238out:
5239        return ret;
5240}
5241
5242/*
5243 * Add new xattr bucket in an extent record and adjust the buckets
5244 * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5245 * bucket we want to insert into.
5246 *
5247 * In the easy case, we will move all the buckets after target down by
5248 * one. Half of target's xattrs will be moved to the next bucket.
5249 *
5250 * If current cluster is full, we'll allocate a new one.  This may not
5251 * be contiguous.  The underlying calls will make sure that there is
5252 * space for the insert, shifting buckets around if necessary.
5253 * 'target' may be moved by those calls.
5254 */
5255static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5256                                      struct buffer_head *xb_bh,
5257                                      struct ocfs2_xattr_bucket *target,
5258                                      struct ocfs2_xattr_set_ctxt *ctxt)
5259{
5260        struct ocfs2_xattr_block *xb =
5261                        (struct ocfs2_xattr_block *)xb_bh->b_data;
5262        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5263        struct ocfs2_extent_list *el = &xb_root->xt_list;
5264        u32 name_hash =
5265                le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5266        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5267        int ret, num_buckets, extend = 1;
5268        u64 p_blkno;
5269        u32 e_cpos, num_clusters;
5270        /* The bucket at the front of the extent */
5271        struct ocfs2_xattr_bucket *first;
5272
5273        trace_ocfs2_add_new_xattr_bucket(
5274                                (unsigned long long)bucket_blkno(target));
5275
5276        /* The first bucket of the original extent */
5277        first = ocfs2_xattr_bucket_new(inode);
5278        if (!first) {
5279                ret = -ENOMEM;
5280                mlog_errno(ret);
5281                goto out;
5282        }
5283
5284        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5285                                  &num_clusters, el);
5286        if (ret) {
5287                mlog_errno(ret);
5288                goto out;
5289        }
5290
5291        ret = ocfs2_read_xattr_bucket(first, p_blkno);
5292        if (ret) {
5293                mlog_errno(ret);
5294                goto out;
5295        }
5296
5297        num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5298        if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5299                /*
5300                 * This can move first+target if the target bucket moves
5301                 * to the new extent.
5302                 */
5303                ret = ocfs2_add_new_xattr_cluster(inode,
5304                                                  xb_bh,
5305                                                  first,
5306                                                  target,
5307                                                  &num_clusters,
5308                                                  e_cpos,
5309                                                  &extend,
5310                                                  ctxt);
5311                if (ret) {
5312                        mlog_errno(ret);
5313                        goto out;
5314                }
5315        }
5316
5317        if (extend) {
5318                ret = ocfs2_extend_xattr_bucket(inode,
5319                                                ctxt->handle,
5320                                                first,
5321                                                bucket_blkno(target),
5322                                                num_clusters);
5323                if (ret)
5324                        mlog_errno(ret);
5325        }
5326
5327out:
5328        ocfs2_xattr_bucket_free(first);
5329
5330        return ret;
5331}
5332
5333/*
5334 * Truncate the specified xe_off entry in xattr bucket.
5335 * bucket is indicated by header_bh and len is the new length.
5336 * Both the ocfs2_xattr_value_root and the entry will be updated here.
5337 *
5338 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5339 */
5340static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5341                                             struct ocfs2_xattr_bucket *bucket,
5342                                             int xe_off,
5343                                             int len,
5344                                             struct ocfs2_xattr_set_ctxt *ctxt)
5345{
5346        int ret, offset;
5347        u64 value_blk;
5348        struct ocfs2_xattr_entry *xe;
5349        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5350        size_t blocksize = inode->i_sb->s_blocksize;
5351        struct ocfs2_xattr_value_buf vb = {
5352                .vb_access = ocfs2_journal_access,
5353        };
5354
5355        xe = &xh->xh_entries[xe_off];
5356
5357        BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5358
5359        offset = le16_to_cpu(xe->xe_name_offset) +
5360                 OCFS2_XATTR_SIZE(xe->xe_name_len);
5361
5362        value_blk = offset / blocksize;
5363
5364        /* We don't allow ocfs2_xattr_value to be stored in different block. */
5365        BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5366
5367        vb.vb_bh = bucket->bu_bhs[value_blk];
5368        BUG_ON(!vb.vb_bh);
5369
5370        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5371                (vb.vb_bh->b_data + offset % blocksize);
5372
5373        /*
5374         * From here on out we have to dirty the bucket.  The generic
5375         * value calls only modify one of the bucket's bhs, but we need
5376         * to send the bucket at once.  So if they error, they *could* have
5377         * modified something.  We have to assume they did, and dirty
5378         * the whole bucket.  This leaves us in a consistent state.
5379         */
5380        trace_ocfs2_xattr_bucket_value_truncate(
5381                        (unsigned long long)bucket_blkno(bucket), xe_off, len);
5382        ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5383        if (ret) {
5384                mlog_errno(ret);
5385                goto out;
5386        }
5387
5388        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5389                                                OCFS2_JOURNAL_ACCESS_WRITE);
5390        if (ret) {
5391                mlog_errno(ret);
5392                goto out;
5393        }
5394
5395        xe->xe_value_size = cpu_to_le64(len);
5396
5397        ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5398
5399out:
5400        return ret;
5401}
5402
5403static int ocfs2_rm_xattr_cluster(struct inode *inode,
5404                                  struct buffer_head *root_bh,
5405                                  u64 blkno,
5406                                  u32 cpos,
5407                                  u32 len,
5408                                  void *para)
5409{
5410        int ret;
5411        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5412        struct inode *tl_inode = osb->osb_tl_inode;
5413        handle_t *handle;
5414        struct ocfs2_xattr_block *xb =
5415                        (struct ocfs2_xattr_block *)root_bh->b_data;
5416        struct ocfs2_alloc_context *meta_ac = NULL;
5417        struct ocfs2_cached_dealloc_ctxt dealloc;
5418        struct ocfs2_extent_tree et;
5419
5420        ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5421                                          ocfs2_delete_xattr_in_bucket, para);
5422        if (ret) {
5423                mlog_errno(ret);
5424                return ret;
5425        }
5426
5427        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5428
5429        ocfs2_init_dealloc_ctxt(&dealloc);
5430
5431        trace_ocfs2_rm_xattr_cluster(
5432                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
5433                        (unsigned long long)blkno, cpos, len);
5434
5435        ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5436                                               len);
5437
5438        ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5439        if (ret) {
5440                mlog_errno(ret);
5441                return ret;
5442        }
5443
5444        mutex_lock(&tl_inode->i_mutex);
5445
5446        if (ocfs2_truncate_log_needs_flush(osb)) {
5447                ret = __ocfs2_flush_truncate_log(osb);
5448                if (ret < 0) {
5449                        mlog_errno(ret);
5450                        goto out;
5451                }
5452        }
5453
5454        handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5455        if (IS_ERR(handle)) {
5456                ret = -ENOMEM;
5457                mlog_errno(ret);
5458                goto out;
5459        }
5460
5461        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5462                                      OCFS2_JOURNAL_ACCESS_WRITE);
5463        if (ret) {
5464                mlog_errno(ret);
5465                goto out_commit;
5466        }
5467
5468        ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5469                                  &dealloc);
5470        if (ret) {
5471                mlog_errno(ret);
5472                goto out_commit;
5473        }
5474
5475        le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5476        ocfs2_journal_dirty(handle, root_bh);
5477
5478        ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5479        if (ret)
5480                mlog_errno(ret);
5481        ocfs2_update_inode_fsync_trans(handle, inode, 0);
5482
5483out_commit:
5484        ocfs2_commit_trans(osb, handle);
5485out:
5486        ocfs2_schedule_truncate_log_flush(osb, 1);
5487
5488        mutex_unlock(&tl_inode->i_mutex);
5489
5490        if (meta_ac)
5491                ocfs2_free_alloc_context(meta_ac);
5492
5493        ocfs2_run_deallocs(osb, &dealloc);
5494
5495        return ret;
5496}
5497
5498/*
5499 * check whether the xattr bucket is filled up with the same hash value.
5500 * If we want to insert the xattr with the same hash, return -ENOSPC.
5501 * If we want to insert a xattr with different hash value, go ahead
5502 * and ocfs2_divide_xattr_bucket will handle this.
5503 */
5504static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5505                                              struct ocfs2_xattr_bucket *bucket,
5506                                              const char *name)
5507{
5508        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5509        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5510
5511        if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5512                return 0;
5513
5514        if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5515            xh->xh_entries[0].xe_name_hash) {
5516                mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5517                     "hash = %u\n",
5518                     (unsigned long long)bucket_blkno(bucket),
5519                     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5520                return -ENOSPC;
5521        }
5522
5523        return 0;
5524}
5525
5526/*
5527 * Try to set the entry in the current bucket.  If we fail, the caller
5528 * will handle getting us another bucket.
5529 */
5530static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5531                                        struct ocfs2_xattr_info *xi,
5532                                        struct ocfs2_xattr_search *xs,
5533                                        struct ocfs2_xattr_set_ctxt *ctxt)
5534{
5535        int ret;
5536        struct ocfs2_xa_loc loc;
5537
5538        trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5539
5540        ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5541                                       xs->not_found ? NULL : xs->here);
5542        ret = ocfs2_xa_set(&loc, xi, ctxt);
5543        if (!ret) {
5544                xs->here = loc.xl_entry;
5545                goto out;
5546        }
5547        if (ret != -ENOSPC) {
5548                mlog_errno(ret);
5549                goto out;
5550        }
5551
5552        /* Ok, we need space.  Let's try defragmenting the bucket. */
5553        ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5554                                        xs->bucket);
5555        if (ret) {
5556                mlog_errno(ret);
5557                goto out;
5558        }
5559
5560        ret = ocfs2_xa_set(&loc, xi, ctxt);
5561        if (!ret) {
5562                xs->here = loc.xl_entry;
5563                goto out;
5564        }
5565        if (ret != -ENOSPC)
5566                mlog_errno(ret);
5567
5568
5569out:
5570        return ret;
5571}
5572
5573static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5574                                             struct ocfs2_xattr_info *xi,
5575                                             struct ocfs2_xattr_search *xs,
5576                                             struct ocfs2_xattr_set_ctxt *ctxt)
5577{
5578        int ret;
5579
5580        trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5581
5582        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5583        if (!ret)
5584                goto out;
5585        if (ret != -ENOSPC) {
5586                mlog_errno(ret);
5587                goto out;
5588        }
5589
5590        /* Ack, need more space.  Let's try to get another bucket! */
5591
5592        /*
5593         * We do not allow for overlapping ranges between buckets. And
5594         * the maximum number of collisions we will allow for then is
5595         * one bucket's worth, so check it here whether we need to
5596         * add a new bucket for the insert.
5597         */
5598        ret = ocfs2_check_xattr_bucket_collision(inode,
5599                                                 xs->bucket,
5600                                                 xi->xi_name);
5601        if (ret) {
5602                mlog_errno(ret);
5603                goto out;
5604        }
5605
5606        ret = ocfs2_add_new_xattr_bucket(inode,
5607                                         xs->xattr_bh,
5608                                         xs->bucket,
5609                                         ctxt);
5610        if (ret) {
5611                mlog_errno(ret);
5612                goto out;
5613        }
5614
5615        /*
5616         * ocfs2_add_new_xattr_bucket() will have updated
5617         * xs->bucket if it moved, but it will not have updated
5618         * any of the other search fields.  Thus, we drop it and
5619         * re-search.  Everything should be cached, so it'll be
5620         * quick.
5621         */
5622        ocfs2_xattr_bucket_relse(xs->bucket);
5623        ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5624                                           xi->xi_name_index,
5625                                           xi->xi_name, xs);
5626        if (ret && ret != -ENODATA)
5627                goto out;
5628        xs->not_found = ret;
5629
5630        /* Ok, we have a new bucket, let's try again */
5631        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5632        if (ret && (ret != -ENOSPC))
5633                mlog_errno(ret);
5634
5635out:
5636        return ret;
5637}
5638
5639static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5640                                        struct ocfs2_xattr_bucket *bucket,
5641                                        void *para)
5642{
5643        int ret = 0, ref_credits;
5644        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5645        u16 i;
5646        struct ocfs2_xattr_entry *xe;
5647        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5648        struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5649        int credits = ocfs2_remove_extent_credits(osb->sb) +
5650                ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5651        struct ocfs2_xattr_value_root *xv;
5652        struct ocfs2_rm_xattr_bucket_para *args =
5653                        (struct ocfs2_rm_xattr_bucket_para *)para;
5654
5655        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5656
5657        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5658                xe = &xh->xh_entries[i];
5659                if (ocfs2_xattr_is_local(xe))
5660                        continue;
5661
5662                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5663                                                      i, &xv, NULL);
5664                if (ret) {
5665                        mlog_errno(ret);
5666                        break;
5667                }
5668
5669                ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5670                                                         args->ref_ci,
5671                                                         args->ref_root_bh,
5672                                                         &ctxt.meta_ac,
5673                                                         &ref_credits);
5674
5675                ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5676                if (IS_ERR(ctxt.handle)) {
5677                        ret = PTR_ERR(ctxt.handle);
5678                        mlog_errno(ret);
5679                        break;
5680                }
5681
5682                ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5683                                                        i, 0, &ctxt);
5684
5685                ocfs2_commit_trans(osb, ctxt.handle);
5686                if (ctxt.meta_ac) {
5687                        ocfs2_free_alloc_context(ctxt.meta_ac);
5688                        ctxt.meta_ac = NULL;
5689                }
5690                if (ret) {
5691                        mlog_errno(ret);
5692                        break;
5693                }
5694        }
5695
5696        if (ctxt.meta_ac)
5697                ocfs2_free_alloc_context(ctxt.meta_ac);
5698        ocfs2_schedule_truncate_log_flush(osb, 1);
5699        ocfs2_run_deallocs(osb, &ctxt.dealloc);
5700        return ret;
5701}
5702
5703/*
5704 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5705 * or change the extent record flag), we need to recalculate
5706 * the metaecc for the whole bucket. So it is done here.
5707 *
5708 * Note:
5709 * We have to give the extra credits for the caller.
5710 */
5711static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5712                                            handle_t *handle,
5713                                            void *para)
5714{
5715        int ret;
5716        struct ocfs2_xattr_bucket *bucket =
5717                        (struct ocfs2_xattr_bucket *)para;
5718
5719        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5720                                                OCFS2_JOURNAL_ACCESS_WRITE);
5721        if (ret) {
5722                mlog_errno(ret);
5723                return ret;
5724        }
5725
5726        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5727
5728        return 0;
5729}
5730
5731/*
5732 * Special action we need if the xattr value is refcounted.
5733 *
5734 * 1. If the xattr is refcounted, lock the tree.
5735 * 2. CoW the xattr if we are setting the new value and the value
5736 *    will be stored outside.
5737 * 3. In other case, decrease_refcount will work for us, so just
5738 *    lock the refcount tree, calculate the meta and credits is OK.
5739 *
5740 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5741 * currently CoW is a completed transaction, while this function
5742 * will also lock the allocators and let us deadlock. So we will
5743 * CoW the whole xattr value.
5744 */
5745static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5746                                        struct ocfs2_dinode *di,
5747                                        struct ocfs2_xattr_info *xi,
5748                                        struct ocfs2_xattr_search *xis,
5749                                        struct ocfs2_xattr_search *xbs,
5750                                        struct ocfs2_refcount_tree **ref_tree,
5751                                        int *meta_add,
5752                                        int *credits)
5753{
5754        int ret = 0;
5755        struct ocfs2_xattr_block *xb;
5756        struct ocfs2_xattr_entry *xe;
5757        char *base;
5758        u32 p_cluster, num_clusters;
5759        unsigned int ext_flags;
5760        int name_offset, name_len;
5761        struct ocfs2_xattr_value_buf vb;
5762        struct ocfs2_xattr_bucket *bucket = NULL;
5763        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5764        struct ocfs2_post_refcount refcount;
5765        struct ocfs2_post_refcount *p = NULL;
5766        struct buffer_head *ref_root_bh = NULL;
5767
5768        if (!xis->not_found) {
5769                xe = xis->here;
5770                name_offset = le16_to_cpu(xe->xe_name_offset);
5771                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5772                base = xis->base;
5773                vb.vb_bh = xis->inode_bh;
5774                vb.vb_access = ocfs2_journal_access_di;
5775        } else {
5776                int i, block_off = 0;
5777                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5778                xe = xbs->here;
5779                name_offset = le16_to_cpu(xe->xe_name_offset);
5780                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5781                i = xbs->here - xbs->header->xh_entries;
5782
5783                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5784                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5785                                                        bucket_xh(xbs->bucket),
5786                                                        i, &block_off,
5787                                                        &name_offset);
5788                        if (ret) {
5789                                mlog_errno(ret);
5790                                goto out;
5791                        }
5792                        base = bucket_block(xbs->bucket, block_off);
5793                        vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5794                        vb.vb_access = ocfs2_journal_access;
5795
5796                        if (ocfs2_meta_ecc(osb)) {
5797                                /*create parameters for ocfs2_post_refcount. */
5798                                bucket = xbs->bucket;
5799                                refcount.credits = bucket->bu_blocks;
5800                                refcount.para = bucket;
5801                                refcount.func =
5802                                        ocfs2_xattr_bucket_post_refcount;
5803                                p = &refcount;
5804                        }
5805                } else {
5806                        base = xbs->base;
5807                        vb.vb_bh = xbs->xattr_bh;
5808                        vb.vb_access = ocfs2_journal_access_xb;
5809                }
5810        }
5811
5812        if (ocfs2_xattr_is_local(xe))
5813                goto out;
5814
5815        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5816                                (base + name_offset + name_len);
5817
5818        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5819                                       &num_clusters, &vb.vb_xv->xr_list,
5820                                       &ext_flags);
5821        if (ret) {
5822                mlog_errno(ret);
5823                goto out;
5824        }
5825
5826        /*
5827         * We just need to check the 1st extent record, since we always
5828         * CoW the whole xattr. So there shouldn't be a xattr with
5829         * some REFCOUNT extent recs after the 1st one.
5830         */
5831        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5832                goto out;
5833
5834        ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5835                                       1, ref_tree, &ref_root_bh);
5836        if (ret) {
5837                mlog_errno(ret);
5838                goto out;
5839        }
5840
5841        /*
5842         * If we are deleting the xattr or the new size will be stored inside,
5843         * cool, leave it there, the xattr truncate process will remove them
5844         * for us(it still needs the refcount tree lock and the meta, credits).
5845         * And the worse case is that every cluster truncate will split the
5846         * refcount tree, and make the original extent become 3. So we will need
5847         * 2 * cluster more extent recs at most.
5848         */
5849        if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5850
5851                ret = ocfs2_refcounted_xattr_delete_need(inode,
5852                                                         &(*ref_tree)->rf_ci,
5853                                                         ref_root_bh, vb.vb_xv,
5854                                                         meta_add, credits);
5855                if (ret)
5856                        mlog_errno(ret);
5857                goto out;
5858        }
5859
5860        ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5861                                       *ref_tree, ref_root_bh, 0,
5862                                       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5863        if (ret)
5864                mlog_errno(ret);
5865
5866out:
5867        brelse(ref_root_bh);
5868        return ret;
5869}
5870
5871/*
5872 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5873 * The physical clusters will be added to refcount tree.
5874 */
5875static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5876                                struct ocfs2_xattr_value_root *xv,
5877                                struct ocfs2_extent_tree *value_et,
5878                                struct ocfs2_caching_info *ref_ci,
5879                                struct buffer_head *ref_root_bh,
5880                                struct ocfs2_cached_dealloc_ctxt *dealloc,
5881                                struct ocfs2_post_refcount *refcount)
5882{
5883        int ret = 0;
5884        u32 clusters = le32_to_cpu(xv->xr_clusters);
5885        u32 cpos, p_cluster, num_clusters;
5886        struct ocfs2_extent_list *el = &xv->xr_list;
5887        unsigned int ext_flags;
5888
5889        cpos = 0;
5890        while (cpos < clusters) {
5891                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5892                                               &num_clusters, el, &ext_flags);
5893                if (ret) {
5894                        mlog_errno(ret);
5895                        break;
5896                }
5897
5898                cpos += num_clusters;
5899                if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5900                        continue;
5901
5902                BUG_ON(!p_cluster);
5903
5904                ret = ocfs2_add_refcount_flag(inode, value_et,
5905                                              ref_ci, ref_root_bh,
5906                                              cpos - num_clusters,
5907                                              p_cluster, num_clusters,
5908                                              dealloc, refcount);
5909                if (ret) {
5910                        mlog_errno(ret);
5911                        break;
5912                }
5913        }
5914
5915        return ret;
5916}
5917
5918/*
5919 * Given a normal ocfs2_xattr_header, refcount all the entries which
5920 * have value stored outside.
5921 * Used for xattrs stored in inode and ocfs2_xattr_block.
5922 */
5923static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5924                                struct ocfs2_xattr_value_buf *vb,
5925                                struct ocfs2_xattr_header *header,
5926                                struct ocfs2_caching_info *ref_ci,
5927                                struct buffer_head *ref_root_bh,
5928                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5929{
5930
5931        struct ocfs2_xattr_entry *xe;
5932        struct ocfs2_xattr_value_root *xv;
5933        struct ocfs2_extent_tree et;
5934        int i, ret = 0;
5935
5936        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5937                xe = &header->xh_entries[i];
5938
5939                if (ocfs2_xattr_is_local(xe))
5940                        continue;
5941
5942                xv = (struct ocfs2_xattr_value_root *)((void *)header +
5943                        le16_to_cpu(xe->xe_name_offset) +
5944                        OCFS2_XATTR_SIZE(xe->xe_name_len));
5945
5946                vb->vb_xv = xv;
5947                ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5948
5949                ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5950                                                        ref_ci, ref_root_bh,
5951                                                        dealloc, NULL);
5952                if (ret) {
5953                        mlog_errno(ret);
5954                        break;
5955                }
5956        }
5957
5958        return ret;
5959}
5960
5961static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5962                                struct buffer_head *fe_bh,
5963                                struct ocfs2_caching_info *ref_ci,
5964                                struct buffer_head *ref_root_bh,
5965                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5966{
5967        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5968        struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5969                                (fe_bh->b_data + inode->i_sb->s_blocksize -
5970                                le16_to_cpu(di->i_xattr_inline_size));
5971        struct ocfs2_xattr_value_buf vb = {
5972                .vb_bh = fe_bh,
5973                .vb_access = ocfs2_journal_access_di,
5974        };
5975
5976        return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5977                                                  ref_ci, ref_root_bh, dealloc);
5978}
5979
5980struct ocfs2_xattr_tree_value_refcount_para {
5981        struct ocfs2_caching_info *ref_ci;
5982        struct buffer_head *ref_root_bh;
5983        struct ocfs2_cached_dealloc_ctxt *dealloc;
5984};
5985
5986static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5987                                           struct ocfs2_xattr_bucket *bucket,
5988                                           int offset,
5989                                           struct ocfs2_xattr_value_root **xv,
5990                                           struct buffer_head **bh)
5991{
5992        int ret, block_off, name_offset;
5993        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5994        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5995        void *base;
5996
5997        ret = ocfs2_xattr_bucket_get_name_value(sb,
5998                                                bucket_xh(bucket),
5999                                                offset,
6000                                                &block_off,
6001                                                &name_offset);
6002        if (ret) {
6003                mlog_errno(ret);
6004                goto out;
6005        }
6006
6007        base = bucket_block(bucket, block_off);
6008
6009        *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6010                         OCFS2_XATTR_SIZE(xe->xe_name_len));
6011
6012        if (bh)
6013                *bh = bucket->bu_bhs[block_off];
6014out:
6015        return ret;
6016}
6017
6018/*
6019 * For a given xattr bucket, refcount all the entries which
6020 * have value stored outside.
6021 */
6022static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6023                                             struct ocfs2_xattr_bucket *bucket,
6024                                             void *para)
6025{
6026        int i, ret = 0;
6027        struct ocfs2_extent_tree et;
6028        struct ocfs2_xattr_tree_value_refcount_para *ref =
6029                        (struct ocfs2_xattr_tree_value_refcount_para *)para;
6030        struct ocfs2_xattr_header *xh =
6031                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6032        struct ocfs2_xattr_entry *xe;
6033        struct ocfs2_xattr_value_buf vb = {
6034                .vb_access = ocfs2_journal_access,
6035        };
6036        struct ocfs2_post_refcount refcount = {
6037                .credits = bucket->bu_blocks,
6038                .para = bucket,
6039                .func = ocfs2_xattr_bucket_post_refcount,
6040        };
6041        struct ocfs2_post_refcount *p = NULL;
6042
6043        /* We only need post_refcount if we support metaecc. */
6044        if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6045                p = &refcount;
6046
6047        trace_ocfs2_xattr_bucket_value_refcount(
6048                                (unsigned long long)bucket_blkno(bucket),
6049                                le16_to_cpu(xh->xh_count));
6050        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6051                xe = &xh->xh_entries[i];
6052
6053                if (ocfs2_xattr_is_local(xe))
6054                        continue;
6055
6056                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6057                                                      &vb.vb_xv, &vb.vb_bh);
6058                if (ret) {
6059                        mlog_errno(ret);
6060                        break;
6061                }
6062
6063                ocfs2_init_xattr_value_extent_tree(&et,
6064                                                   INODE_CACHE(inode), &vb);
6065
6066                ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6067                                                        &et, ref->ref_ci,
6068                                                        ref->ref_root_bh,
6069                                                        ref->dealloc, p);
6070                if (ret) {
6071                        mlog_errno(ret);
6072                        break;
6073                }
6074        }
6075
6076        return ret;
6077
6078}
6079
6080static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6081                                     struct buffer_head *root_bh,
6082                                     u64 blkno, u32 cpos, u32 len, void *para)
6083{
6084        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6085                                           ocfs2_xattr_bucket_value_refcount,
6086                                           para);
6087}
6088
6089static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6090                                struct buffer_head *blk_bh,
6091                                struct ocfs2_caching_info *ref_ci,
6092                                struct buffer_head *ref_root_bh,
6093                                struct ocfs2_cached_dealloc_ctxt *dealloc)
6094{
6095        int ret = 0;
6096        struct ocfs2_xattr_block *xb =
6097                                (struct ocfs2_xattr_block *)blk_bh->b_data;
6098
6099        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6100                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6101                struct ocfs2_xattr_value_buf vb = {
6102                        .vb_bh = blk_bh,
6103                        .vb_access = ocfs2_journal_access_xb,
6104                };
6105
6106                ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6107                                                         ref_ci, ref_root_bh,
6108                                                         dealloc);
6109        } else {
6110                struct ocfs2_xattr_tree_value_refcount_para para = {
6111                        .ref_ci = ref_ci,
6112                        .ref_root_bh = ref_root_bh,
6113                        .dealloc = dealloc,
6114                };
6115
6116                ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6117                                                ocfs2_refcount_xattr_tree_rec,
6118                                                &para);
6119        }
6120
6121        return ret;
6122}
6123
6124int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6125                                     struct buffer_head *fe_bh,
6126                                     struct ocfs2_caching_info *ref_ci,
6127                                     struct buffer_head *ref_root_bh,
6128                                     struct ocfs2_cached_dealloc_ctxt *dealloc)
6129{
6130        int ret = 0;
6131        struct ocfs2_inode_info *oi = OCFS2_I(inode);
6132        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6133        struct buffer_head *blk_bh = NULL;
6134
6135        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6136                ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6137                                                         ref_ci, ref_root_bh,
6138                                                         dealloc);
6139                if (ret) {
6140                        mlog_errno(ret);
6141                        goto out;
6142                }
6143        }
6144
6145        if (!di->i_xattr_loc)
6146                goto out;
6147
6148        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6149                                     &blk_bh);
6150        if (ret < 0) {
6151                mlog_errno(ret);
6152                goto out;
6153        }
6154
6155        ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6156                                                ref_root_bh, dealloc);
6157        if (ret)
6158                mlog_errno(ret);
6159
6160        brelse(blk_bh);
6161out:
6162
6163        return ret;
6164}
6165
6166typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6167/*
6168 * Store the information we need in xattr reflink.
6169 * old_bh and new_bh are inode bh for the old and new inode.
6170 */
6171struct ocfs2_xattr_reflink {
6172        struct inode *old_inode;
6173        struct inode *new_inode;
6174        struct buffer_head *old_bh;
6175        struct buffer_head *new_bh;
6176        struct ocfs2_caching_info *ref_ci;
6177        struct buffer_head *ref_root_bh;
6178        struct ocfs2_cached_dealloc_ctxt *dealloc;
6179        should_xattr_reflinked *xattr_reflinked;
6180};
6181
6182/*
6183 * Given a xattr header and xe offset,
6184 * return the proper xv and the corresponding bh.
6185 * xattr in inode, block and xattr tree have different implementaions.
6186 */
6187typedef int (get_xattr_value_root)(struct super_block *sb,
6188                                   struct buffer_head *bh,
6189                                   struct ocfs2_xattr_header *xh,
6190                                   int offset,
6191                                   struct ocfs2_xattr_value_root **xv,
6192                                   struct buffer_head **ret_bh,
6193                                   void *para);
6194
6195/*
6196 * Calculate all the xattr value root metadata stored in this xattr header and
6197 * credits we need if we create them from the scratch.
6198 * We use get_xattr_value_root so that all types of xattr container can use it.
6199 */
6200static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6201                                             struct buffer_head *bh,
6202                                             struct ocfs2_xattr_header *xh,
6203                                             int *metas, int *credits,
6204                                             int *num_recs,
6205                                             get_xattr_value_root *func,
6206                                             void *para)
6207{
6208        int i, ret = 0;
6209        struct ocfs2_xattr_value_root *xv;
6210        struct ocfs2_xattr_entry *xe;
6211
6212        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6213                xe = &xh->xh_entries[i];
6214                if (ocfs2_xattr_is_local(xe))
6215                        continue;
6216
6217                ret = func(sb, bh, xh, i, &xv, NULL, para);
6218                if (ret) {
6219                        mlog_errno(ret);
6220                        break;
6221                }
6222
6223                *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6224                          le16_to_cpu(xv->xr_list.l_next_free_rec);
6225
6226                *credits += ocfs2_calc_extend_credits(sb,
6227                                                &def_xv.xv.xr_list);
6228
6229                /*
6230                 * If the value is a tree with depth > 1, We don't go deep
6231                 * to the extent block, so just calculate a maximum record num.
6232                 */
6233                if (!xv->xr_list.l_tree_depth)
6234                        *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6235                else
6236                        *num_recs += ocfs2_clusters_for_bytes(sb,
6237                                                              XATTR_SIZE_MAX);
6238        }
6239
6240        return ret;
6241}
6242
6243/* Used by xattr inode and block to return the right xv and buffer_head. */
6244static int ocfs2_get_xattr_value_root(struct super_block *sb,
6245                                      struct buffer_head *bh,
6246                                      struct ocfs2_xattr_header *xh,
6247                                      int offset,
6248                                      struct ocfs2_xattr_value_root **xv,
6249                                      struct buffer_head **ret_bh,
6250                                      void *para)
6251{
6252        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6253
6254        *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6255                le16_to_cpu(xe->xe_name_offset) +
6256                OCFS2_XATTR_SIZE(xe->xe_name_len));
6257
6258        if (ret_bh)
6259                *ret_bh = bh;
6260
6261        return 0;
6262}
6263
6264/*
6265 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6266 * It is only used for inline xattr and xattr block.
6267 */
6268static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6269                                        struct ocfs2_xattr_header *xh,
6270                                        struct buffer_head *ref_root_bh,
6271                                        int *credits,
6272                                        struct ocfs2_alloc_context **meta_ac)
6273{
6274        int ret, meta_add = 0, num_recs = 0;
6275        struct ocfs2_refcount_block *rb =
6276                        (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6277
6278        *credits = 0;
6279
6280        ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6281                                                &meta_add, credits, &num_recs,
6282                                                ocfs2_get_xattr_value_root,
6283                                                NULL);
6284        if (ret) {
6285                mlog_errno(ret);
6286                goto out;
6287        }
6288
6289        /*
6290         * We need to add/modify num_recs in refcount tree, so just calculate
6291         * an approximate number we need for refcount tree change.
6292         * Sometimes we need to split the tree, and after split,  half recs
6293         * will be moved to the new block, and a new block can only provide
6294         * half number of recs. So we multiple new blocks by 2.
6295         */
6296        num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6297        meta_add += num_recs;
6298        *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6299        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6300                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6301                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6302        else
6303                *credits += 1;
6304
6305        ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6306        if (ret)
6307                mlog_errno(ret);
6308
6309out:
6310        return ret;
6311}
6312
6313/*
6314 * Given a xattr header, reflink all the xattrs in this container.
6315 * It can be used for inode, block and bucket.
6316 *
6317 * NOTE:
6318 * Before we call this function, the caller has memcpy the xattr in
6319 * old_xh to the new_xh.
6320 *
6321 * If args.xattr_reflinked is set, call it to decide whether the xe should
6322 * be reflinked or not. If not, remove it from the new xattr header.
6323 */
6324static int ocfs2_reflink_xattr_header(handle_t *handle,
6325                                      struct ocfs2_xattr_reflink *args,
6326                                      struct buffer_head *old_bh,
6327                                      struct ocfs2_xattr_header *xh,
6328                                      struct buffer_head *new_bh,
6329                                      struct ocfs2_xattr_header *new_xh,
6330                                      struct ocfs2_xattr_value_buf *vb,
6331                                      struct ocfs2_alloc_context *meta_ac,
6332                                      get_xattr_value_root *func,
6333                                      void *para)
6334{
6335        int ret = 0, i, j;
6336        struct super_block *sb = args->old_inode->i_sb;
6337        struct buffer_head *value_bh;
6338        struct ocfs2_xattr_entry *xe, *last;
6339        struct ocfs2_xattr_value_root *xv, *new_xv;
6340        struct ocfs2_extent_tree data_et;
6341        u32 clusters, cpos, p_cluster, num_clusters;
6342        unsigned int ext_flags = 0;
6343
6344        trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6345                                         le16_to_cpu(xh->xh_count));
6346
6347        last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6348        for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6349                xe = &xh->xh_entries[i];
6350
6351                if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6352                        xe = &new_xh->xh_entries[j];
6353
6354                        le16_add_cpu(&new_xh->xh_count, -1);
6355                        if (new_xh->xh_count) {
6356                                memmove(xe, xe + 1,
6357                                        (void *)last - (void *)xe);
6358                                memset(last, 0,
6359                                       sizeof(struct ocfs2_xattr_entry));
6360                        }
6361
6362                        /*
6363                         * We don't want j to increase in the next round since
6364                         * it is already moved ahead.
6365                         */
6366                        j--;
6367                        continue;
6368                }
6369
6370                if (ocfs2_xattr_is_local(xe))
6371                        continue;
6372
6373                ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6374                if (ret) {
6375                        mlog_errno(ret);
6376                        break;
6377                }
6378
6379                ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6380                if (ret) {
6381                        mlog_errno(ret);
6382                        break;
6383                }
6384
6385                /*
6386                 * For the xattr which has l_tree_depth = 0, all the extent
6387                 * recs have already be copied to the new xh with the
6388                 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6389                 * increase the refount count int the refcount tree.
6390                 *
6391                 * For the xattr which has l_tree_depth > 0, we need
6392                 * to initialize it to the empty default value root,
6393                 * and then insert the extents one by one.
6394                 */
6395                if (xv->xr_list.l_tree_depth) {
6396                        memcpy(new_xv, &def_xv, sizeof(def_xv));
6397                        vb->vb_xv = new_xv;
6398                        vb->vb_bh = value_bh;
6399                        ocfs2_init_xattr_value_extent_tree(&data_et,
6400                                        INODE_CACHE(args->new_inode), vb);
6401                }
6402
6403                clusters = le32_to_cpu(xv->xr_clusters);
6404                cpos = 0;
6405                while (cpos < clusters) {
6406                        ret = ocfs2_xattr_get_clusters(args->old_inode,
6407                                                       cpos,
6408                                                       &p_cluster,
6409                                                       &num_clusters,
6410                                                       &xv->xr_list,
6411                                                       &ext_flags);
6412                        if (ret) {
6413                                mlog_errno(ret);
6414                                goto out;
6415                        }
6416
6417                        BUG_ON(!p_cluster);
6418
6419                        if (xv->xr_list.l_tree_depth) {
6420                                ret = ocfs2_insert_extent(handle,
6421                                                &data_et, cpos,
6422                                                ocfs2_clusters_to_blocks(
6423                                                        args->old_inode->i_sb,
6424                                                        p_cluster),
6425                                                num_clusters, ext_flags,
6426                                                meta_ac);
6427                                if (ret) {
6428                                        mlog_errno(ret);
6429                                        goto out;
6430                                }
6431                        }
6432
6433                        ret = ocfs2_increase_refcount(handle, args->ref_ci,
6434                                                      args->ref_root_bh,
6435                                                      p_cluster, num_clusters,
6436                                                      meta_ac, args->dealloc);
6437                        if (ret) {
6438                                mlog_errno(ret);
6439                                goto out;
6440                        }
6441
6442                        cpos += num_clusters;
6443                }
6444        }
6445
6446out:
6447        return ret;
6448}
6449
6450static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6451{
6452        int ret = 0, credits = 0;
6453        handle_t *handle;
6454        struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6455        struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6456        int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6457        int header_off = osb->sb->s_blocksize - inline_size;
6458        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6459                                        (args->old_bh->b_data + header_off);
6460        struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6461                                        (args->new_bh->b_data + header_off);
6462        struct ocfs2_alloc_context *meta_ac = NULL;
6463        struct ocfs2_inode_info *new_oi;
6464        struct ocfs2_dinode *new_di;
6465        struct ocfs2_xattr_value_buf vb = {
6466                .vb_bh = args->new_bh,
6467                .vb_access = ocfs2_journal_access_di,
6468        };
6469
6470        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6471                                                  &credits, &meta_ac);
6472        if (ret) {
6473                mlog_errno(ret);
6474                goto out;
6475        }
6476
6477        handle = ocfs2_start_trans(osb, credits);
6478        if (IS_ERR(handle)) {
6479                ret = PTR_ERR(handle);
6480                mlog_errno(ret);
6481                goto out;
6482        }
6483
6484        ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6485                                      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6486        if (ret) {
6487                mlog_errno(ret);
6488                goto out_commit;
6489        }
6490
6491        memcpy(args->new_bh->b_data + header_off,
6492               args->old_bh->b_data + header_off, inline_size);
6493
6494        new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6495        new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6496
6497        ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6498                                         args->new_bh, new_xh, &vb, meta_ac,
6499                                         ocfs2_get_xattr_value_root, NULL);
6500        if (ret) {
6501                mlog_errno(ret);
6502                goto out_commit;
6503        }
6504
6505        new_oi = OCFS2_I(args->new_inode);
6506        /*
6507         * Adjust extent record count to reserve space for extended attribute.
6508         * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
6509         */
6510        if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
6511            !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
6512                struct ocfs2_extent_list *el = &new_di->id2.i_list;
6513                le16_add_cpu(&el->l_count, -(inline_size /
6514                                        sizeof(struct ocfs2_extent_rec)));
6515        }
6516        spin_lock(&new_oi->ip_lock);
6517        new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6518        new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6519        spin_unlock(&new_oi->ip_lock);
6520
6521        ocfs2_journal_dirty(handle, args->new_bh);
6522
6523out_commit:
6524        ocfs2_commit_trans(osb, handle);
6525
6526out:
6527        if (meta_ac)
6528                ocfs2_free_alloc_context(meta_ac);
6529        return ret;
6530}
6531
6532static int ocfs2_create_empty_xattr_block(struct inode *inode,
6533                                          struct buffer_head *fe_bh,
6534                                          struct buffer_head **ret_bh,
6535                                          int indexed)
6536{
6537        int ret;
6538        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6539        struct ocfs2_xattr_set_ctxt ctxt;
6540
6541        memset(&ctxt, 0, sizeof(ctxt));
6542        ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6543        if (ret < 0) {
6544                mlog_errno(ret);
6545                return ret;
6546        }
6547
6548        ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6549        if (IS_ERR(ctxt.handle)) {
6550                ret = PTR_ERR(ctxt.handle);
6551                mlog_errno(ret);
6552                goto out;
6553        }
6554
6555        trace_ocfs2_create_empty_xattr_block(
6556                                (unsigned long long)fe_bh->b_blocknr, indexed);
6557        ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6558                                       ret_bh);
6559        if (ret)
6560                mlog_errno(ret);
6561
6562        ocfs2_commit_trans(osb, ctxt.handle);
6563out:
6564        ocfs2_free_alloc_context(ctxt.meta_ac);
6565        return ret;
6566}
6567
6568static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6569                                     struct buffer_head *blk_bh,
6570                                     struct buffer_head *new_blk_bh)
6571{
6572        int ret = 0, credits = 0;
6573        handle_t *handle;
6574        struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6575        struct ocfs2_dinode *new_di;
6576        struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6577        int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6578        struct ocfs2_xattr_block *xb =
6579                        (struct ocfs2_xattr_block *)blk_bh->b_data;
6580        struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6581        struct ocfs2_xattr_block *new_xb =
6582                        (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6583        struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6584        struct ocfs2_alloc_context *meta_ac;
6585        struct ocfs2_xattr_value_buf vb = {
6586                .vb_bh = new_blk_bh,
6587                .vb_access = ocfs2_journal_access_xb,
6588        };
6589
6590        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6591                                                  &credits, &meta_ac);
6592        if (ret) {
6593                mlog_errno(ret);
6594                return ret;
6595        }
6596
6597        /* One more credits in case we need to add xattr flags in new inode. */
6598        handle = ocfs2_start_trans(osb, credits + 1);
6599        if (IS_ERR(handle)) {
6600                ret = PTR_ERR(handle);
6601                mlog_errno(ret);
6602                goto out;
6603        }
6604
6605        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6606                ret = ocfs2_journal_access_di(handle,
6607                                              INODE_CACHE(args->new_inode),
6608                                              args->new_bh,
6609                                              OCFS2_JOURNAL_ACCESS_WRITE);
6610                if (ret) {
6611                        mlog_errno(ret);
6612                        goto out_commit;
6613                }
6614        }
6615
6616        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6617                                      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6618        if (ret) {
6619                mlog_errno(ret);
6620                goto out_commit;
6621        }
6622
6623        memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6624               osb->sb->s_blocksize - header_off);
6625
6626        ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6627                                         new_blk_bh, new_xh, &vb, meta_ac,
6628                                         ocfs2_get_xattr_value_root, NULL);
6629        if (ret) {
6630                mlog_errno(ret);
6631                goto out_commit;
6632        }
6633
6634        ocfs2_journal_dirty(handle, new_blk_bh);
6635
6636        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6637                new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6638                spin_lock(&new_oi->ip_lock);
6639                new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6640                new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6641                spin_unlock(&new_oi->ip_lock);
6642
6643                ocfs2_journal_dirty(handle, args->new_bh);
6644        }
6645
6646out_commit:
6647        ocfs2_commit_trans(osb, handle);
6648
6649out:
6650        ocfs2_free_alloc_context(meta_ac);
6651        return ret;
6652}
6653
6654struct ocfs2_reflink_xattr_tree_args {
6655        struct ocfs2_xattr_reflink *reflink;
6656        struct buffer_head *old_blk_bh;
6657        struct buffer_head *new_blk_bh;
6658        struct ocfs2_xattr_bucket *old_bucket;
6659        struct ocfs2_xattr_bucket *new_bucket;
6660};
6661
6662/*
6663 * NOTE:
6664 * We have to handle the case that both old bucket and new bucket
6665 * will call this function to get the right ret_bh.
6666 * So The caller must give us the right bh.
6667 */
6668static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6669                                        struct buffer_head *bh,
6670                                        struct ocfs2_xattr_header *xh,
6671                                        int offset,
6672                                        struct ocfs2_xattr_value_root **xv,
6673                                        struct buffer_head **ret_bh,
6674                                        void *para)
6675{
6676        struct ocfs2_reflink_xattr_tree_args *args =
6677                        (struct ocfs2_reflink_xattr_tree_args *)para;
6678        struct ocfs2_xattr_bucket *bucket;
6679
6680        if (bh == args->old_bucket->bu_bhs[0])
6681                bucket = args->old_bucket;
6682        else
6683                bucket = args->new_bucket;
6684
6685        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6686                                               xv, ret_bh);
6687}
6688
6689struct ocfs2_value_tree_metas {
6690        int num_metas;
6691        int credits;
6692        int num_recs;
6693};
6694
6695static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6696                                        struct buffer_head *bh,
6697                                        struct ocfs2_xattr_header *xh,
6698                                        int offset,
6699                                        struct ocfs2_xattr_value_root **xv,
6700                                        struct buffer_head **ret_bh,
6701                                        void *para)
6702{
6703        struct ocfs2_xattr_bucket *bucket =
6704                                (struct ocfs2_xattr_bucket *)para;
6705
6706        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6707                                               xv, ret_bh);
6708}
6709
6710static int ocfs2_calc_value_tree_metas(struct inode *inode,
6711                                      struct ocfs2_xattr_bucket *bucket,
6712                                      void *para)
6713{
6714        struct ocfs2_value_tree_metas *metas =
6715                        (struct ocfs2_value_tree_metas *)para;
6716        struct ocfs2_xattr_header *xh =
6717                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6718
6719        /* Add the credits for this bucket first. */
6720        metas->credits += bucket->bu_blocks;
6721        return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6722                                        xh, &metas->num_metas,
6723                                        &metas->credits, &metas->num_recs,
6724                                        ocfs2_value_tree_metas_in_bucket,
6725                                        bucket);
6726}
6727
6728/*
6729 * Given a xattr extent rec starting from blkno and having len clusters,
6730 * iterate all the buckets calculate how much metadata we need for reflinking
6731 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6732 */
6733static int ocfs2_lock_reflink_xattr_rec_allocators(
6734                                struct ocfs2_reflink_xattr_tree_args *args,
6735                                struct ocfs2_extent_tree *xt_et,
6736                                u64 blkno, u32 len, int *credits,
6737                                struct ocfs2_alloc_context **meta_ac,
6738                                struct ocfs2_alloc_context **data_ac)
6739{
6740        int ret, num_free_extents;
6741        struct ocfs2_value_tree_metas metas;
6742        struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6743        struct ocfs2_refcount_block *rb;
6744
6745        memset(&metas, 0, sizeof(metas));
6746
6747        ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6748                                          ocfs2_calc_value_tree_metas, &metas);
6749        if (ret) {
6750                mlog_errno(ret);
6751                goto out;
6752        }
6753
6754        *credits = metas.credits;
6755
6756        /*
6757         * Calculate we need for refcount tree change.
6758         *
6759         * We need to add/modify num_recs in refcount tree, so just calculate
6760         * an approximate number we need for refcount tree change.
6761         * Sometimes we need to split the tree, and after split,  half recs
6762         * will be moved to the new block, and a new block can only provide
6763         * half number of recs. So we multiple new blocks by 2.
6764         * In the end, we have to add credits for modifying the already
6765         * existed refcount block.
6766         */
6767        rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6768        metas.num_recs =
6769                (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6770                 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6771        metas.num_metas += metas.num_recs;
6772        *credits += metas.num_recs +
6773                    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6774        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6775                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6776                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6777        else
6778                *credits += 1;
6779
6780        /* count in the xattr tree change. */
6781        num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6782        if (num_free_extents < 0) {
6783                ret = num_free_extents;
6784                mlog_errno(ret);
6785                goto out;
6786        }
6787
6788        if (num_free_extents < len)
6789                metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6790
6791        *credits += ocfs2_calc_extend_credits(osb->sb,
6792                                              xt_et->et_root_el);
6793
6794        if (metas.num_metas) {
6795                ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6796                                                        meta_ac);
6797                if (ret) {
6798                        mlog_errno(ret);
6799                        goto out;
6800                }
6801        }
6802
6803        if (len) {
6804                ret = ocfs2_reserve_clusters(osb, len, data_ac);
6805                if (ret)
6806                        mlog_errno(ret);
6807        }
6808out:
6809        if (ret) {
6810                if (*meta_ac) {
6811                        ocfs2_free_alloc_context(*meta_ac);
6812                        *meta_ac = NULL;
6813                }
6814        }
6815
6816        return ret;
6817}
6818
6819static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6820                                u64 blkno, u64 new_blkno, u32 clusters,
6821                                u32 *cpos, int num_buckets,
6822                                struct ocfs2_alloc_context *meta_ac,
6823                                struct ocfs2_alloc_context *data_ac,
6824                                struct ocfs2_reflink_xattr_tree_args *args)
6825{
6826        int i, j, ret = 0;
6827        struct super_block *sb = args->reflink->old_inode->i_sb;
6828        int bpb = args->old_bucket->bu_blocks;
6829        struct ocfs2_xattr_value_buf vb = {
6830                .vb_access = ocfs2_journal_access,
6831        };
6832
6833        for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6834                ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6835                if (ret) {
6836                        mlog_errno(ret);
6837                        break;
6838                }
6839
6840                ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6841                if (ret) {
6842                        mlog_errno(ret);
6843                        break;
6844                }
6845
6846                ret = ocfs2_xattr_bucket_journal_access(handle,
6847                                                args->new_bucket,
6848                                                OCFS2_JOURNAL_ACCESS_CREATE);
6849                if (ret) {
6850                        mlog_errno(ret);
6851                        break;
6852                }
6853
6854                for (j = 0; j < bpb; j++)
6855                        memcpy(bucket_block(args->new_bucket, j),
6856                               bucket_block(args->old_bucket, j),
6857                               sb->s_blocksize);
6858
6859                /*
6860                 * Record the start cpos so that we can use it to initialize
6861                 * our xattr tree we also set the xh_num_bucket for the new
6862                 * bucket.
6863                 */
6864                if (i == 0) {
6865                        *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6866                                            xh_entries[0].xe_name_hash);
6867                        bucket_xh(args->new_bucket)->xh_num_buckets =
6868                                cpu_to_le16(num_buckets);
6869                }
6870
6871                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6872
6873                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6874                                        args->old_bucket->bu_bhs[0],
6875                                        bucket_xh(args->old_bucket),
6876                                        args->new_bucket->bu_bhs[0],
6877                                        bucket_xh(args->new_bucket),
6878                                        &vb, meta_ac,
6879                                        ocfs2_get_reflink_xattr_value_root,
6880                                        args);
6881                if (ret) {
6882                        mlog_errno(ret);
6883                        break;
6884                }
6885
6886                /*
6887                 * Re-access and dirty the bucket to calculate metaecc.
6888                 * Because we may extend the transaction in reflink_xattr_header
6889                 * which will let the already accessed block gone.
6890                 */
6891                ret = ocfs2_xattr_bucket_journal_access(handle,
6892                                                args->new_bucket,
6893                                                OCFS2_JOURNAL_ACCESS_WRITE);
6894                if (ret) {
6895                        mlog_errno(ret);
6896                        break;
6897                }
6898
6899                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6900
6901                ocfs2_xattr_bucket_relse(args->old_bucket);
6902                ocfs2_xattr_bucket_relse(args->new_bucket);
6903        }
6904
6905        ocfs2_xattr_bucket_relse(args->old_bucket);
6906        ocfs2_xattr_bucket_relse(args->new_bucket);
6907        return ret;
6908}
6909
6910static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6911                                struct inode *inode,
6912                                struct ocfs2_reflink_xattr_tree_args *args,
6913                                struct ocfs2_extent_tree *et,
6914                                struct ocfs2_alloc_context *meta_ac,
6915                                struct ocfs2_alloc_context *data_ac,
6916                                u64 blkno, u32 cpos, u32 len)
6917{
6918        int ret, first_inserted = 0;
6919        u32 p_cluster, num_clusters, reflink_cpos = 0;
6920        u64 new_blkno;
6921        unsigned int num_buckets, reflink_buckets;
6922        unsigned int bpc =
6923                ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6924
6925        ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6926        if (ret) {
6927                mlog_errno(ret);
6928                goto out;
6929        }
6930        num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6931        ocfs2_xattr_bucket_relse(args->old_bucket);
6932
6933        while (len && num_buckets) {
6934                ret = ocfs2_claim_clusters(handle, data_ac,
6935                                           1, &p_cluster, &num_clusters);
6936                if (ret) {
6937                        mlog_errno(ret);
6938                        goto out;
6939                }
6940
6941                new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6942                reflink_buckets = min(num_buckets, bpc * num_clusters);
6943
6944                ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6945                                                 new_blkno, num_clusters,
6946                                                 &reflink_cpos, reflink_buckets,
6947                                                 meta_ac, data_ac, args);
6948                if (ret) {
6949                        mlog_errno(ret);
6950                        goto out;
6951                }
6952
6953                /*
6954                 * For the 1st allocated cluster, we make it use the same cpos
6955                 * so that the xattr tree looks the same as the original one
6956                 * in the most case.
6957                 */
6958                if (!first_inserted) {
6959                        reflink_cpos = cpos;
6960                        first_inserted = 1;
6961                }
6962                ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6963                                          num_clusters, 0, meta_ac);
6964                if (ret)
6965                        mlog_errno(ret);
6966
6967                trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6968                                                  num_clusters, reflink_cpos);
6969
6970                len -= num_clusters;
6971                blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6972                num_buckets -= reflink_buckets;
6973        }
6974out:
6975        return ret;
6976}
6977
6978/*
6979 * Create the same xattr extent record in the new inode's xattr tree.
6980 */
6981static int ocfs2_reflink_xattr_rec(struct inode *inode,
6982                                   struct buffer_head *root_bh,
6983                                   u64 blkno,
6984                                   u32 cpos,
6985                                   u32 len,
6986                                   void *para)
6987{
6988        int ret, credits = 0;
6989        handle_t *handle;
6990        struct ocfs2_reflink_xattr_tree_args *args =
6991                        (struct ocfs2_reflink_xattr_tree_args *)para;
6992        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6993        struct ocfs2_alloc_context *meta_ac = NULL;
6994        struct ocfs2_alloc_context *data_ac = NULL;
6995        struct ocfs2_extent_tree et;
6996
6997        trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
6998
6999        ocfs2_init_xattr_tree_extent_tree(&et,
7000                                          INODE_CACHE(args->reflink->new_inode),
7001                                          args->new_blk_bh);
7002
7003        ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7004                                                      len, &credits,
7005                                                      &meta_ac, &data_ac);
7006        if (ret) {
7007                mlog_errno(ret);
7008                goto out;
7009        }
7010
7011        handle = ocfs2_start_trans(osb, credits);
7012        if (IS_ERR(handle)) {
7013                ret = PTR_ERR(handle);
7014                mlog_errno(ret);
7015                goto out;
7016        }
7017
7018        ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7019                                          meta_ac, data_ac,
7020                                          blkno, cpos, len);
7021        if (ret)
7022                mlog_errno(ret);
7023
7024        ocfs2_commit_trans(osb, handle);
7025
7026out:
7027        if (meta_ac)
7028                ocfs2_free_alloc_context(meta_ac);
7029        if (data_ac)
7030                ocfs2_free_alloc_context(data_ac);
7031        return ret;
7032}
7033
7034/*
7035 * Create reflinked xattr buckets.
7036 * We will add bucket one by one, and refcount all the xattrs in the bucket
7037 * if they are stored outside.
7038 */
7039static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7040                                    struct buffer_head *blk_bh,
7041                                    struct buffer_head *new_blk_bh)
7042{
7043        int ret;
7044        struct ocfs2_reflink_xattr_tree_args para;
7045
7046        memset(&para, 0, sizeof(para));
7047        para.reflink = args;
7048        para.old_blk_bh = blk_bh;
7049        para.new_blk_bh = new_blk_bh;
7050
7051        para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7052        if (!para.old_bucket) {
7053                mlog_errno(-ENOMEM);
7054                return -ENOMEM;
7055        }
7056
7057        para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7058        if (!para.new_bucket) {
7059                ret = -ENOMEM;
7060                mlog_errno(ret);
7061                goto out;
7062        }
7063
7064        ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7065                                              ocfs2_reflink_xattr_rec,
7066                                              &para);
7067        if (ret)
7068                mlog_errno(ret);
7069
7070out:
7071        ocfs2_xattr_bucket_free(para.old_bucket);
7072        ocfs2_xattr_bucket_free(para.new_bucket);
7073        return ret;
7074}
7075
7076static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7077                                        struct buffer_head *blk_bh)
7078{
7079        int ret, indexed = 0;
7080        struct buffer_head *new_blk_bh = NULL;
7081        struct ocfs2_xattr_block *xb =
7082                        (struct ocfs2_xattr_block *)blk_bh->b_data;
7083
7084
7085        if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7086                indexed = 1;
7087
7088        ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7089                                             &new_blk_bh, indexed);
7090        if (ret) {
7091                mlog_errno(ret);
7092                goto out;
7093        }
7094
7095        if (!indexed)
7096                ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7097        else
7098                ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7099        if (ret)
7100                mlog_errno(ret);
7101
7102out:
7103        brelse(new_blk_bh);
7104        return ret;
7105}
7106
7107static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7108{
7109        int type = ocfs2_xattr_get_type(xe);
7110
7111        return type != OCFS2_XATTR_INDEX_SECURITY &&
7112               type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7113               type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7114}
7115
7116int ocfs2_reflink_xattrs(struct inode *old_inode,
7117                         struct buffer_head *old_bh,
7118                         struct inode *new_inode,
7119                         struct buffer_head *new_bh,
7120                         bool preserve_security)
7121{
7122        int ret;
7123        struct ocfs2_xattr_reflink args;
7124        struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7125        struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7126        struct buffer_head *blk_bh = NULL;
7127        struct ocfs2_cached_dealloc_ctxt dealloc;
7128        struct ocfs2_refcount_tree *ref_tree;
7129        struct buffer_head *ref_root_bh = NULL;
7130
7131        ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7132                                       le64_to_cpu(di->i_refcount_loc),
7133                                       1, &ref_tree, &ref_root_bh);
7134        if (ret) {
7135                mlog_errno(ret);
7136                goto out;
7137        }
7138
7139        ocfs2_init_dealloc_ctxt(&dealloc);
7140
7141        args.old_inode = old_inode;
7142        args.new_inode = new_inode;
7143        args.old_bh = old_bh;
7144        args.new_bh = new_bh;
7145        args.ref_ci = &ref_tree->rf_ci;
7146        args.ref_root_bh = ref_root_bh;
7147        args.dealloc = &dealloc;
7148        if (preserve_security)
7149                args.xattr_reflinked = NULL;
7150        else
7151                args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7152
7153        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7154                ret = ocfs2_reflink_xattr_inline(&args);
7155                if (ret) {
7156                        mlog_errno(ret);
7157                        goto out_unlock;
7158                }
7159        }
7160
7161        if (!di->i_xattr_loc)
7162                goto out_unlock;
7163
7164        ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7165                                     &blk_bh);
7166        if (ret < 0) {
7167                mlog_errno(ret);
7168                goto out_unlock;
7169        }
7170
7171        ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7172        if (ret)
7173                mlog_errno(ret);
7174
7175        brelse(blk_bh);
7176
7177out_unlock:
7178        ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7179                                   ref_tree, 1);
7180        brelse(ref_root_bh);
7181
7182        if (ocfs2_dealloc_has_cluster(&dealloc)) {
7183                ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7184                ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7185        }
7186
7187out:
7188        return ret;
7189}
7190
7191/*
7192 * Initialize security and acl for a already created inode.
7193 * Used for reflink a non-preserve-security file.
7194 *
7195 * It uses common api like ocfs2_xattr_set, so the caller
7196 * must not hold any lock expect i_mutex.
7197 */
7198int ocfs2_init_security_and_acl(struct inode *dir,
7199                                struct inode *inode,
7200                                const struct qstr *qstr,
7201                                struct posix_acl *default_acl,
7202                                struct posix_acl *acl)
7203{
7204        struct buffer_head *dir_bh = NULL;
7205        int ret = 0;
7206
7207        ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7208        if (ret) {
7209                mlog_errno(ret);
7210                goto leave;
7211        }
7212
7213        ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7214        if (ret) {
7215                mlog_errno(ret);
7216                goto leave;
7217        }
7218
7219        if (!ret && default_acl)
7220                ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
7221        if (!ret && acl)
7222                ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
7223
7224        ocfs2_inode_unlock(dir, 0);
7225        brelse(dir_bh);
7226leave:
7227        return ret;
7228}
7229/*
7230 * 'security' attributes support
7231 */
7232static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7233                                        size_t list_size, const char *name,
7234                                        size_t name_len, int type)
7235{
7236        const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7237        const size_t total_len = prefix_len + name_len + 1;
7238
7239        if (list && total_len <= list_size) {
7240                memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7241                memcpy(list + prefix_len, name, name_len);
7242                list[prefix_len + name_len] = '\0';
7243        }
7244        return total_len;
7245}
7246
7247static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7248                                    void *buffer, size_t size, int type)
7249{
7250        if (strcmp(name, "") == 0)
7251                return -EINVAL;
7252        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
7253                               name, buffer, size);
7254}
7255
7256static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7257                const void *value, size_t size, int flags, int type)
7258{
7259        if (strcmp(name, "") == 0)
7260                return -EINVAL;
7261
7262        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
7263                               name, value, size, flags);
7264}
7265
7266static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7267                     void *fs_info)
7268{
7269        const struct xattr *xattr;
7270        int err = 0;
7271
7272        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7273                err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7274                                      xattr->name, xattr->value,
7275                                      xattr->value_len, XATTR_CREATE);
7276                if (err)
7277                        break;
7278        }
7279        return err;
7280}
7281
7282int ocfs2_init_security_get(struct inode *inode,
7283                            struct inode *dir,
7284                            const struct qstr *qstr,
7285                            struct ocfs2_security_xattr_info *si)
7286{
7287        /* check whether ocfs2 support feature xattr */
7288        if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7289                return -EOPNOTSUPP;
7290        if (si)
7291                return security_old_inode_init_security(inode, dir, qstr,
7292                                                        &si->name, &si->value,
7293                                                        &si->value_len);
7294
7295        return security_inode_init_security(inode, dir, qstr,
7296                                            &ocfs2_initxattrs, NULL);
7297}
7298
7299int ocfs2_init_security_set(handle_t *handle,
7300                            struct inode *inode,
7301                            struct buffer_head *di_bh,
7302                            struct ocfs2_security_xattr_info *si,
7303                            struct ocfs2_alloc_context *xattr_ac,
7304                            struct ocfs2_alloc_context *data_ac)
7305{
7306        return ocfs2_xattr_set_handle(handle, inode, di_bh,
7307                                     OCFS2_XATTR_INDEX_SECURITY,
7308                                     si->name, si->value, si->value_len, 0,
7309                                     xattr_ac, data_ac);
7310}
7311
7312const struct xattr_handler ocfs2_xattr_security_handler = {
7313        .prefix = XATTR_SECURITY_PREFIX,
7314        .list   = ocfs2_xattr_security_list,
7315        .get    = ocfs2_xattr_security_get,
7316        .set    = ocfs2_xattr_security_set,
7317};
7318
7319/*
7320 * 'trusted' attributes support
7321 */
7322static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7323                                       size_t list_size, const char *name,
7324                                       size_t name_len, int type)
7325{
7326        const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7327        const size_t total_len = prefix_len + name_len + 1;
7328
7329        if (!capable(CAP_SYS_ADMIN))
7330                return 0;
7331
7332        if (list && total_len <= list_size) {
7333                memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7334                memcpy(list + prefix_len, name, name_len);
7335                list[prefix_len + name_len] = '\0';
7336        }
7337        return total_len;
7338}
7339
7340static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7341                void *buffer, size_t size, int type)
7342{
7343        if (strcmp(name, "") == 0)
7344                return -EINVAL;
7345        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
7346                               name, buffer, size);
7347}
7348
7349static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7350                const void *value, size_t size, int flags, int type)
7351{
7352        if (strcmp(name, "") == 0)
7353                return -EINVAL;
7354
7355        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
7356                               name, value, size, flags);
7357}
7358
7359const struct xattr_handler ocfs2_xattr_trusted_handler = {
7360        .prefix = XATTR_TRUSTED_PREFIX,
7361        .list   = ocfs2_xattr_trusted_list,
7362        .get    = ocfs2_xattr_trusted_get,
7363        .set    = ocfs2_xattr_trusted_set,
7364};
7365
7366/*
7367 * 'user' attributes support
7368 */
7369static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7370                                    size_t list_size, const char *name,
7371                                    size_t name_len, int type)
7372{
7373        const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7374        const size_t total_len = prefix_len + name_len + 1;
7375        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7376
7377        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7378                return 0;
7379
7380        if (list && total_len <= list_size) {
7381                memcpy(list, XATTR_USER_PREFIX, prefix_len);
7382                memcpy(list + prefix_len, name, name_len);
7383                list[prefix_len + name_len] = '\0';
7384        }
7385        return total_len;
7386}
7387
7388static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7389                void *buffer, size_t size, int type)
7390{
7391        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7392
7393        if (strcmp(name, "") == 0)
7394                return -EINVAL;
7395        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7396                return -EOPNOTSUPP;
7397        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
7398                               buffer, size);
7399}
7400
7401static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7402                const void *value, size_t size, int flags, int type)
7403{
7404        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7405
7406        if (strcmp(name, "") == 0)
7407                return -EINVAL;
7408        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7409                return -EOPNOTSUPP;
7410
7411        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER,
7412                               name, value, size, flags);
7413}
7414
7415const struct xattr_handler ocfs2_xattr_user_handler = {
7416        .prefix = XATTR_USER_PREFIX,
7417        .list   = ocfs2_xattr_user_list,
7418        .get    = ocfs2_xattr_user_get,
7419        .set    = ocfs2_xattr_user_set,
7420};
7421