linux/fs/ocfs2/xattr.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * xattr.c
   5 *
   6 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
   7 *
   8 * CREDITS:
   9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
  10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  11 *
  12 * This program is free software; you can redistribute it and/or
  13 * modify it under the terms of the GNU General Public
  14 * License version 2 as published by the Free Software Foundation.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 * General Public License for more details.
  20 */
  21
  22#include <linux/capability.h>
  23#include <linux/fs.h>
  24#include <linux/types.h>
  25#include <linux/slab.h>
  26#include <linux/highmem.h>
  27#include <linux/pagemap.h>
  28#include <linux/uio.h>
  29#include <linux/sched.h>
  30#include <linux/splice.h>
  31#include <linux/mount.h>
  32#include <linux/writeback.h>
  33#include <linux/falloc.h>
  34#include <linux/sort.h>
  35#include <linux/init.h>
  36#include <linux/module.h>
  37#include <linux/string.h>
  38#include <linux/security.h>
  39
  40#include <cluster/masklog.h>
  41
  42#include "ocfs2.h"
  43#include "alloc.h"
  44#include "blockcheck.h"
  45#include "dlmglue.h"
  46#include "file.h"
  47#include "symlink.h"
  48#include "sysfile.h"
  49#include "inode.h"
  50#include "journal.h"
  51#include "ocfs2_fs.h"
  52#include "suballoc.h"
  53#include "uptodate.h"
  54#include "buffer_head_io.h"
  55#include "super.h"
  56#include "xattr.h"
  57#include "refcounttree.h"
  58#include "acl.h"
  59#include "ocfs2_trace.h"
  60
  61struct ocfs2_xattr_def_value_root {
  62        struct ocfs2_xattr_value_root   xv;
  63        struct ocfs2_extent_rec         er;
  64};
  65
  66struct ocfs2_xattr_bucket {
  67        /* The inode these xattrs are associated with */
  68        struct inode *bu_inode;
  69
  70        /* The actual buffers that make up the bucket */
  71        struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
  72
  73        /* How many blocks make up one bucket for this filesystem */
  74        int bu_blocks;
  75};
  76
  77struct ocfs2_xattr_set_ctxt {
  78        handle_t *handle;
  79        struct ocfs2_alloc_context *meta_ac;
  80        struct ocfs2_alloc_context *data_ac;
  81        struct ocfs2_cached_dealloc_ctxt dealloc;
  82        int set_abort;
  83};
  84
  85#define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
  86#define OCFS2_XATTR_INLINE_SIZE 80
  87#define OCFS2_XATTR_HEADER_GAP  4
  88#define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
  89                                         - sizeof(struct ocfs2_xattr_header) \
  90                                         - OCFS2_XATTR_HEADER_GAP)
  91#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
  92                                         - sizeof(struct ocfs2_xattr_block) \
  93                                         - sizeof(struct ocfs2_xattr_header) \
  94                                         - OCFS2_XATTR_HEADER_GAP)
  95
  96static struct ocfs2_xattr_def_value_root def_xv = {
  97        .xv.xr_list.l_count = cpu_to_le16(1),
  98};
  99
 100const struct xattr_handler *ocfs2_xattr_handlers[] = {
 101        &ocfs2_xattr_user_handler,
 102        &posix_acl_access_xattr_handler,
 103        &posix_acl_default_xattr_handler,
 104        &ocfs2_xattr_trusted_handler,
 105        &ocfs2_xattr_security_handler,
 106        NULL
 107};
 108
 109static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 110        [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
 111        [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
 112                                        = &posix_acl_access_xattr_handler,
 113        [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
 114                                        = &posix_acl_default_xattr_handler,
 115        [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
 116        [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
 117};
 118
 119struct ocfs2_xattr_info {
 120        int             xi_name_index;
 121        const char      *xi_name;
 122        int             xi_name_len;
 123        const void      *xi_value;
 124        size_t          xi_value_len;
 125};
 126
 127struct ocfs2_xattr_search {
 128        struct buffer_head *inode_bh;
 129        /*
 130         * xattr_bh point to the block buffer head which has extended attribute
 131         * when extended attribute in inode, xattr_bh is equal to inode_bh.
 132         */
 133        struct buffer_head *xattr_bh;
 134        struct ocfs2_xattr_header *header;
 135        struct ocfs2_xattr_bucket *bucket;
 136        void *base;
 137        void *end;
 138        struct ocfs2_xattr_entry *here;
 139        int not_found;
 140};
 141
 142/* Operations on struct ocfs2_xa_entry */
 143struct ocfs2_xa_loc;
 144struct ocfs2_xa_loc_operations {
 145        /*
 146         * Journal functions
 147         */
 148        int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
 149                                  int type);
 150        void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
 151
 152        /*
 153         * Return a pointer to the appropriate buffer in loc->xl_storage
 154         * at the given offset from loc->xl_header.
 155         */
 156        void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
 157
 158        /* Can we reuse the existing entry for the new value? */
 159        int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
 160                             struct ocfs2_xattr_info *xi);
 161
 162        /* How much space is needed for the new value? */
 163        int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
 164                               struct ocfs2_xattr_info *xi);
 165
 166        /*
 167         * Return the offset of the first name+value pair.  This is
 168         * the start of our downward-filling free space.
 169         */
 170        int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
 171
 172        /*
 173         * Remove the name+value at this location.  Do whatever is
 174         * appropriate with the remaining name+value pairs.
 175         */
 176        void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
 177
 178        /* Fill xl_entry with a new entry */
 179        void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
 180
 181        /* Add name+value storage to an entry */
 182        void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
 183
 184        /*
 185         * Initialize the value buf's access and bh fields for this entry.
 186         * ocfs2_xa_fill_value_buf() will handle the xv pointer.
 187         */
 188        void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
 189                                   struct ocfs2_xattr_value_buf *vb);
 190};
 191
 192/*
 193 * Describes an xattr entry location.  This is a memory structure
 194 * tracking the on-disk structure.
 195 */
 196struct ocfs2_xa_loc {
 197        /* This xattr belongs to this inode */
 198        struct inode *xl_inode;
 199
 200        /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
 201        struct ocfs2_xattr_header *xl_header;
 202
 203        /* Bytes from xl_header to the end of the storage */
 204        int xl_size;
 205
 206        /*
 207         * The ocfs2_xattr_entry this location describes.  If this is
 208         * NULL, this location describes the on-disk structure where it
 209         * would have been.
 210         */
 211        struct ocfs2_xattr_entry *xl_entry;
 212
 213        /*
 214         * Internal housekeeping
 215         */
 216
 217        /* Buffer(s) containing this entry */
 218        void *xl_storage;
 219
 220        /* Operations on the storage backing this location */
 221        const struct ocfs2_xa_loc_operations *xl_ops;
 222};
 223
 224/*
 225 * Convenience functions to calculate how much space is needed for a
 226 * given name+value pair
 227 */
 228static int namevalue_size(int name_len, uint64_t value_len)
 229{
 230        if (value_len > OCFS2_XATTR_INLINE_SIZE)
 231                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 232        else
 233                return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
 234}
 235
 236static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
 237{
 238        return namevalue_size(xi->xi_name_len, xi->xi_value_len);
 239}
 240
 241static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
 242{
 243        u64 value_len = le64_to_cpu(xe->xe_value_size);
 244
 245        BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
 246               ocfs2_xattr_is_local(xe));
 247        return namevalue_size(xe->xe_name_len, value_len);
 248}
 249
 250
 251static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 252                                             struct ocfs2_xattr_header *xh,
 253                                             int index,
 254                                             int *block_off,
 255                                             int *new_offset);
 256
 257static int ocfs2_xattr_block_find(struct inode *inode,
 258                                  int name_index,
 259                                  const char *name,
 260                                  struct ocfs2_xattr_search *xs);
 261static int ocfs2_xattr_index_block_find(struct inode *inode,
 262                                        struct buffer_head *root_bh,
 263                                        int name_index,
 264                                        const char *name,
 265                                        struct ocfs2_xattr_search *xs);
 266
 267static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 268                                        struct buffer_head *blk_bh,
 269                                        char *buffer,
 270                                        size_t buffer_size);
 271
 272static int ocfs2_xattr_create_index_block(struct inode *inode,
 273                                          struct ocfs2_xattr_search *xs,
 274                                          struct ocfs2_xattr_set_ctxt *ctxt);
 275
 276static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 277                                             struct ocfs2_xattr_info *xi,
 278                                             struct ocfs2_xattr_search *xs,
 279                                             struct ocfs2_xattr_set_ctxt *ctxt);
 280
 281typedef int (xattr_tree_rec_func)(struct inode *inode,
 282                                  struct buffer_head *root_bh,
 283                                  u64 blkno, u32 cpos, u32 len, void *para);
 284static int ocfs2_iterate_xattr_index_block(struct inode *inode,
 285                                           struct buffer_head *root_bh,
 286                                           xattr_tree_rec_func *rec_func,
 287                                           void *para);
 288static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 289                                        struct ocfs2_xattr_bucket *bucket,
 290                                        void *para);
 291static int ocfs2_rm_xattr_cluster(struct inode *inode,
 292                                  struct buffer_head *root_bh,
 293                                  u64 blkno,
 294                                  u32 cpos,
 295                                  u32 len,
 296                                  void *para);
 297
 298static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 299                                  u64 src_blk, u64 last_blk, u64 to_blk,
 300                                  unsigned int start_bucket,
 301                                  u32 *first_hash);
 302static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 303                                        struct ocfs2_dinode *di,
 304                                        struct ocfs2_xattr_info *xi,
 305                                        struct ocfs2_xattr_search *xis,
 306                                        struct ocfs2_xattr_search *xbs,
 307                                        struct ocfs2_refcount_tree **ref_tree,
 308                                        int *meta_need,
 309                                        int *credits);
 310static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 311                                           struct ocfs2_xattr_bucket *bucket,
 312                                           int offset,
 313                                           struct ocfs2_xattr_value_root **xv,
 314                                           struct buffer_head **bh);
 315
 316static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 317{
 318        return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
 319}
 320
 321static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
 322{
 323        return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
 324}
 325
 326#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
 327#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 328#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 329
 330static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
 331{
 332        struct ocfs2_xattr_bucket *bucket;
 333        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 334
 335        BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
 336
 337        bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
 338        if (bucket) {
 339                bucket->bu_inode = inode;
 340                bucket->bu_blocks = blks;
 341        }
 342
 343        return bucket;
 344}
 345
 346static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
 347{
 348        int i;
 349
 350        for (i = 0; i < bucket->bu_blocks; i++) {
 351                brelse(bucket->bu_bhs[i]);
 352                bucket->bu_bhs[i] = NULL;
 353        }
 354}
 355
 356static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
 357{
 358        if (bucket) {
 359                ocfs2_xattr_bucket_relse(bucket);
 360                bucket->bu_inode = NULL;
 361                kfree(bucket);
 362        }
 363}
 364
 365/*
 366 * A bucket that has never been written to disk doesn't need to be
 367 * read.  We just need the buffer_heads.  Don't call this for
 368 * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
 369 * them fully.
 370 */
 371static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 372                                   u64 xb_blkno, int new)
 373{
 374        int i, rc = 0;
 375
 376        for (i = 0; i < bucket->bu_blocks; i++) {
 377                bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
 378                                              xb_blkno + i);
 379                if (!bucket->bu_bhs[i]) {
 380                        rc = -ENOMEM;
 381                        mlog_errno(rc);
 382                        break;
 383                }
 384
 385                if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 386                                           bucket->bu_bhs[i])) {
 387                        if (new)
 388                                ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 389                                                              bucket->bu_bhs[i]);
 390                        else {
 391                                set_buffer_uptodate(bucket->bu_bhs[i]);
 392                                ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 393                                                          bucket->bu_bhs[i]);
 394                        }
 395                }
 396        }
 397
 398        if (rc)
 399                ocfs2_xattr_bucket_relse(bucket);
 400        return rc;
 401}
 402
 403/* Read the xattr bucket at xb_blkno */
 404static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 405                                   u64 xb_blkno)
 406{
 407        int rc;
 408
 409        rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 410                               bucket->bu_blocks, bucket->bu_bhs, 0,
 411                               NULL);
 412        if (!rc) {
 413                spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 414                rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
 415                                                 bucket->bu_bhs,
 416                                                 bucket->bu_blocks,
 417                                                 &bucket_xh(bucket)->xh_check);
 418                spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 419                if (rc)
 420                        mlog_errno(rc);
 421        }
 422
 423        if (rc)
 424                ocfs2_xattr_bucket_relse(bucket);
 425        return rc;
 426}
 427
 428static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 429                                             struct ocfs2_xattr_bucket *bucket,
 430                                             int type)
 431{
 432        int i, rc = 0;
 433
 434        for (i = 0; i < bucket->bu_blocks; i++) {
 435                rc = ocfs2_journal_access(handle,
 436                                          INODE_CACHE(bucket->bu_inode),
 437                                          bucket->bu_bhs[i], type);
 438                if (rc) {
 439                        mlog_errno(rc);
 440                        break;
 441                }
 442        }
 443
 444        return rc;
 445}
 446
 447static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 448                                             struct ocfs2_xattr_bucket *bucket)
 449{
 450        int i;
 451
 452        spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 453        ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
 454                                   bucket->bu_bhs, bucket->bu_blocks,
 455                                   &bucket_xh(bucket)->xh_check);
 456        spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 457
 458        for (i = 0; i < bucket->bu_blocks; i++)
 459                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 460}
 461
 462static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 463                                         struct ocfs2_xattr_bucket *src)
 464{
 465        int i;
 466        int blocksize = src->bu_inode->i_sb->s_blocksize;
 467
 468        BUG_ON(dest->bu_blocks != src->bu_blocks);
 469        BUG_ON(dest->bu_inode != src->bu_inode);
 470
 471        for (i = 0; i < src->bu_blocks; i++) {
 472                memcpy(bucket_block(dest, i), bucket_block(src, i),
 473                       blocksize);
 474        }
 475}
 476
 477static int ocfs2_validate_xattr_block(struct super_block *sb,
 478                                      struct buffer_head *bh)
 479{
 480        int rc;
 481        struct ocfs2_xattr_block *xb =
 482                (struct ocfs2_xattr_block *)bh->b_data;
 483
 484        trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
 485
 486        BUG_ON(!buffer_uptodate(bh));
 487
 488        /*
 489         * If the ecc fails, we return the error but otherwise
 490         * leave the filesystem running.  We know any error is
 491         * local to this block.
 492         */
 493        rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
 494        if (rc)
 495                return rc;
 496
 497        /*
 498         * Errors after here are fatal
 499         */
 500
 501        if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
 502                return ocfs2_error(sb,
 503                                   "Extended attribute block #%llu has bad signature %.*s\n",
 504                                   (unsigned long long)bh->b_blocknr, 7,
 505                                   xb->xb_signature);
 506        }
 507
 508        if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
 509                return ocfs2_error(sb,
 510                                   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
 511                                   (unsigned long long)bh->b_blocknr,
 512                                   (unsigned long long)le64_to_cpu(xb->xb_blkno));
 513        }
 514
 515        if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
 516                return ocfs2_error(sb,
 517                                   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
 518                                   (unsigned long long)bh->b_blocknr,
 519                                   le32_to_cpu(xb->xb_fs_generation));
 520        }
 521
 522        return 0;
 523}
 524
 525static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 526                                  struct buffer_head **bh)
 527{
 528        int rc;
 529        struct buffer_head *tmp = *bh;
 530
 531        rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 532                              ocfs2_validate_xattr_block);
 533
 534        /* If ocfs2_read_block() got us a new bh, pass it up. */
 535        if (!rc && !*bh)
 536                *bh = tmp;
 537
 538        return rc;
 539}
 540
 541static inline const char *ocfs2_xattr_prefix(int name_index)
 542{
 543        const struct xattr_handler *handler = NULL;
 544
 545        if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
 546                handler = ocfs2_xattr_handler_map[name_index];
 547        return handler ? xattr_prefix(handler) : NULL;
 548}
 549
 550static u32 ocfs2_xattr_name_hash(struct inode *inode,
 551                                 const char *name,
 552                                 int name_len)
 553{
 554        /* Get hash value of uuid from super block */
 555        u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
 556        int i;
 557
 558        /* hash extended attribute name */
 559        for (i = 0; i < name_len; i++) {
 560                hash = (hash << OCFS2_HASH_SHIFT) ^
 561                       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
 562                       *name++;
 563        }
 564
 565        return hash;
 566}
 567
 568static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
 569{
 570        return namevalue_size(name_len, value_len) +
 571                sizeof(struct ocfs2_xattr_entry);
 572}
 573
 574static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
 575{
 576        return namevalue_size_xi(xi) +
 577                sizeof(struct ocfs2_xattr_entry);
 578}
 579
 580static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
 581{
 582        return namevalue_size_xe(xe) +
 583                sizeof(struct ocfs2_xattr_entry);
 584}
 585
 586int ocfs2_calc_security_init(struct inode *dir,
 587                             struct ocfs2_security_xattr_info *si,
 588                             int *want_clusters,
 589                             int *xattr_credits,
 590                             struct ocfs2_alloc_context **xattr_ac)
 591{
 592        int ret = 0;
 593        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 594        int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 595                                                 si->value_len);
 596
 597        /*
 598         * The max space of security xattr taken inline is
 599         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 600         * So reserve one metadata block for it is ok.
 601         */
 602        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 603            s_size > OCFS2_XATTR_FREE_IN_IBODY) {
 604                ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
 605                if (ret) {
 606                        mlog_errno(ret);
 607                        return ret;
 608                }
 609                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 610        }
 611
 612        /* reserve clusters for xattr value which will be set in B tree*/
 613        if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 614                int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 615                                                            si->value_len);
 616
 617                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 618                                                           new_clusters);
 619                *want_clusters += new_clusters;
 620        }
 621        return ret;
 622}
 623
 624int ocfs2_calc_xattr_init(struct inode *dir,
 625                          struct buffer_head *dir_bh,
 626                          umode_t mode,
 627                          struct ocfs2_security_xattr_info *si,
 628                          int *want_clusters,
 629                          int *xattr_credits,
 630                          int *want_meta)
 631{
 632        int ret = 0;
 633        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 634        int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
 635
 636        if (si->enable)
 637                s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 638                                                     si->value_len);
 639
 640        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
 641                acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
 642                                        OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
 643                                        "", NULL, 0);
 644                if (acl_len > 0) {
 645                        a_size = ocfs2_xattr_entry_real_size(0, acl_len);
 646                        if (S_ISDIR(mode))
 647                                a_size <<= 1;
 648                } else if (acl_len != 0 && acl_len != -ENODATA) {
 649                        mlog_errno(ret);
 650                        return ret;
 651                }
 652        }
 653
 654        if (!(s_size + a_size))
 655                return ret;
 656
 657        /*
 658         * The max space of security xattr taken inline is
 659         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 660         * The max space of acl xattr taken inline is
 661         * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
 662         * when blocksize = 512, may reserve one more cluser for
 663         * xattr bucket, otherwise reserve one metadata block
 664         * for them is ok.
 665         * If this is a new directory with inline data,
 666         * we choose to reserve the entire inline area for
 667         * directory contents and force an external xattr block.
 668         */
 669        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 670            (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
 671            (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
 672                *want_meta = *want_meta + 1;
 673                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 674        }
 675
 676        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
 677            (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
 678                *want_clusters += 1;
 679                *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
 680        }
 681
 682        /*
 683         * reserve credits and clusters for xattrs which has large value
 684         * and have to be set outside
 685         */
 686        if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 687                new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 688                                                        si->value_len);
 689                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 690                                                           new_clusters);
 691                *want_clusters += new_clusters;
 692        }
 693        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
 694            acl_len > OCFS2_XATTR_INLINE_SIZE) {
 695                /* for directory, it has DEFAULT and ACCESS two types of acls */
 696                new_clusters = (S_ISDIR(mode) ? 2 : 1) *
 697                                ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
 698                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 699                                                           new_clusters);
 700                *want_clusters += new_clusters;
 701        }
 702
 703        return ret;
 704}
 705
 706static int ocfs2_xattr_extend_allocation(struct inode *inode,
 707                                         u32 clusters_to_add,
 708                                         struct ocfs2_xattr_value_buf *vb,
 709                                         struct ocfs2_xattr_set_ctxt *ctxt)
 710{
 711        int status = 0, credits;
 712        handle_t *handle = ctxt->handle;
 713        enum ocfs2_alloc_restarted why;
 714        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 715        struct ocfs2_extent_tree et;
 716
 717        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 718
 719        while (clusters_to_add) {
 720                trace_ocfs2_xattr_extend_allocation(clusters_to_add);
 721
 722                status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 723                                       OCFS2_JOURNAL_ACCESS_WRITE);
 724                if (status < 0) {
 725                        mlog_errno(status);
 726                        break;
 727                }
 728
 729                prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 730                status = ocfs2_add_clusters_in_btree(handle,
 731                                                     &et,
 732                                                     &logical_start,
 733                                                     clusters_to_add,
 734                                                     0,
 735                                                     ctxt->data_ac,
 736                                                     ctxt->meta_ac,
 737                                                     &why);
 738                if ((status < 0) && (status != -EAGAIN)) {
 739                        if (status != -ENOSPC)
 740                                mlog_errno(status);
 741                        break;
 742                }
 743
 744                ocfs2_journal_dirty(handle, vb->vb_bh);
 745
 746                clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
 747                                         prev_clusters;
 748
 749                if (why != RESTART_NONE && clusters_to_add) {
 750                        /*
 751                         * We can only fail in case the alloc file doesn't give
 752                         * up enough clusters.
 753                         */
 754                        BUG_ON(why == RESTART_META);
 755
 756                        credits = ocfs2_calc_extend_credits(inode->i_sb,
 757                                                            &vb->vb_xv->xr_list);
 758                        status = ocfs2_extend_trans(handle, credits);
 759                        if (status < 0) {
 760                                status = -ENOMEM;
 761                                mlog_errno(status);
 762                                break;
 763                        }
 764                }
 765        }
 766
 767        return status;
 768}
 769
 770static int __ocfs2_remove_xattr_range(struct inode *inode,
 771                                      struct ocfs2_xattr_value_buf *vb,
 772                                      u32 cpos, u32 phys_cpos, u32 len,
 773                                      unsigned int ext_flags,
 774                                      struct ocfs2_xattr_set_ctxt *ctxt)
 775{
 776        int ret;
 777        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 778        handle_t *handle = ctxt->handle;
 779        struct ocfs2_extent_tree et;
 780
 781        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 782
 783        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 784                            OCFS2_JOURNAL_ACCESS_WRITE);
 785        if (ret) {
 786                mlog_errno(ret);
 787                goto out;
 788        }
 789
 790        ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 791                                  &ctxt->dealloc);
 792        if (ret) {
 793                mlog_errno(ret);
 794                goto out;
 795        }
 796
 797        le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
 798        ocfs2_journal_dirty(handle, vb->vb_bh);
 799
 800        if (ext_flags & OCFS2_EXT_REFCOUNTED)
 801                ret = ocfs2_decrease_refcount(inode, handle,
 802                                        ocfs2_blocks_to_clusters(inode->i_sb,
 803                                                                 phys_blkno),
 804                                        len, ctxt->meta_ac, &ctxt->dealloc, 1);
 805        else
 806                ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
 807                                                  phys_blkno, len);
 808        if (ret)
 809                mlog_errno(ret);
 810
 811out:
 812        return ret;
 813}
 814
 815static int ocfs2_xattr_shrink_size(struct inode *inode,
 816                                   u32 old_clusters,
 817                                   u32 new_clusters,
 818                                   struct ocfs2_xattr_value_buf *vb,
 819                                   struct ocfs2_xattr_set_ctxt *ctxt)
 820{
 821        int ret = 0;
 822        unsigned int ext_flags;
 823        u32 trunc_len, cpos, phys_cpos, alloc_size;
 824        u64 block;
 825
 826        if (old_clusters <= new_clusters)
 827                return 0;
 828
 829        cpos = new_clusters;
 830        trunc_len = old_clusters - new_clusters;
 831        while (trunc_len) {
 832                ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 833                                               &alloc_size,
 834                                               &vb->vb_xv->xr_list, &ext_flags);
 835                if (ret) {
 836                        mlog_errno(ret);
 837                        goto out;
 838                }
 839
 840                if (alloc_size > trunc_len)
 841                        alloc_size = trunc_len;
 842
 843                ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 844                                                 phys_cpos, alloc_size,
 845                                                 ext_flags, ctxt);
 846                if (ret) {
 847                        mlog_errno(ret);
 848                        goto out;
 849                }
 850
 851                block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 852                ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
 853                                                       block, alloc_size);
 854                cpos += alloc_size;
 855                trunc_len -= alloc_size;
 856        }
 857
 858out:
 859        return ret;
 860}
 861
 862static int ocfs2_xattr_value_truncate(struct inode *inode,
 863                                      struct ocfs2_xattr_value_buf *vb,
 864                                      int len,
 865                                      struct ocfs2_xattr_set_ctxt *ctxt)
 866{
 867        int ret;
 868        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
 869        u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 870
 871        if (new_clusters == old_clusters)
 872                return 0;
 873
 874        if (new_clusters > old_clusters)
 875                ret = ocfs2_xattr_extend_allocation(inode,
 876                                                    new_clusters - old_clusters,
 877                                                    vb, ctxt);
 878        else
 879                ret = ocfs2_xattr_shrink_size(inode,
 880                                              old_clusters, new_clusters,
 881                                              vb, ctxt);
 882
 883        return ret;
 884}
 885
 886static int ocfs2_xattr_list_entry(struct super_block *sb,
 887                                  char *buffer, size_t size,
 888                                  size_t *result, int type,
 889                                  const char *name, int name_len)
 890{
 891        char *p = buffer + *result;
 892        const char *prefix;
 893        int prefix_len;
 894        int total_len;
 895
 896        switch(type) {
 897        case OCFS2_XATTR_INDEX_USER:
 898                if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 899                        return 0;
 900                break;
 901
 902        case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
 903        case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
 904                if (!(sb->s_flags & MS_POSIXACL))
 905                        return 0;
 906                break;
 907
 908        case OCFS2_XATTR_INDEX_TRUSTED:
 909                if (!capable(CAP_SYS_ADMIN))
 910                        return 0;
 911                break;
 912        }
 913
 914        prefix = ocfs2_xattr_prefix(type);
 915        if (!prefix)
 916                return 0;
 917        prefix_len = strlen(prefix);
 918        total_len = prefix_len + name_len + 1;
 919        *result += total_len;
 920
 921        /* we are just looking for how big our buffer needs to be */
 922        if (!size)
 923                return 0;
 924
 925        if (*result > size)
 926                return -ERANGE;
 927
 928        memcpy(p, prefix, prefix_len);
 929        memcpy(p + prefix_len, name, name_len);
 930        p[prefix_len + name_len] = '\0';
 931
 932        return 0;
 933}
 934
 935static int ocfs2_xattr_list_entries(struct inode *inode,
 936                                    struct ocfs2_xattr_header *header,
 937                                    char *buffer, size_t buffer_size)
 938{
 939        size_t result = 0;
 940        int i, type, ret;
 941        const char *name;
 942
 943        for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
 944                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 945                type = ocfs2_xattr_get_type(entry);
 946                name = (const char *)header +
 947                        le16_to_cpu(entry->xe_name_offset);
 948
 949                ret = ocfs2_xattr_list_entry(inode->i_sb,
 950                                             buffer, buffer_size,
 951                                             &result, type, name,
 952                                             entry->xe_name_len);
 953                if (ret)
 954                        return ret;
 955        }
 956
 957        return result;
 958}
 959
 960int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
 961                                         struct ocfs2_dinode *di)
 962{
 963        struct ocfs2_xattr_header *xh;
 964        int i;
 965
 966        xh = (struct ocfs2_xattr_header *)
 967                 ((void *)di + inode->i_sb->s_blocksize -
 968                 le16_to_cpu(di->i_xattr_inline_size));
 969
 970        for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
 971                if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
 972                        return 1;
 973
 974        return 0;
 975}
 976
 977static int ocfs2_xattr_ibody_list(struct inode *inode,
 978                                  struct ocfs2_dinode *di,
 979                                  char *buffer,
 980                                  size_t buffer_size)
 981{
 982        struct ocfs2_xattr_header *header = NULL;
 983        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 984        int ret = 0;
 985
 986        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
 987                return ret;
 988
 989        header = (struct ocfs2_xattr_header *)
 990                 ((void *)di + inode->i_sb->s_blocksize -
 991                 le16_to_cpu(di->i_xattr_inline_size));
 992
 993        ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
 994
 995        return ret;
 996}
 997
 998static int ocfs2_xattr_block_list(struct inode *inode,
 999                                  struct ocfs2_dinode *di,
1000                                  char *buffer,
1001                                  size_t buffer_size)
1002{
1003        struct buffer_head *blk_bh = NULL;
1004        struct ocfs2_xattr_block *xb;
1005        int ret = 0;
1006
1007        if (!di->i_xattr_loc)
1008                return ret;
1009
1010        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
1011                                     &blk_bh);
1012        if (ret < 0) {
1013                mlog_errno(ret);
1014                return ret;
1015        }
1016
1017        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1018        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1019                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1020                ret = ocfs2_xattr_list_entries(inode, header,
1021                                               buffer, buffer_size);
1022        } else
1023                ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1024                                                   buffer, buffer_size);
1025
1026        brelse(blk_bh);
1027
1028        return ret;
1029}
1030
1031ssize_t ocfs2_listxattr(struct dentry *dentry,
1032                        char *buffer,
1033                        size_t size)
1034{
1035        int ret = 0, i_ret = 0, b_ret = 0;
1036        struct buffer_head *di_bh = NULL;
1037        struct ocfs2_dinode *di = NULL;
1038        struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
1039
1040        if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1041                return -EOPNOTSUPP;
1042
1043        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1044                return ret;
1045
1046        ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
1047        if (ret < 0) {
1048                mlog_errno(ret);
1049                return ret;
1050        }
1051
1052        di = (struct ocfs2_dinode *)di_bh->b_data;
1053
1054        down_read(&oi->ip_xattr_sem);
1055        i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
1056        if (i_ret < 0)
1057                b_ret = 0;
1058        else {
1059                if (buffer) {
1060                        buffer += i_ret;
1061                        size -= i_ret;
1062                }
1063                b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
1064                                               buffer, size);
1065                if (b_ret < 0)
1066                        i_ret = 0;
1067        }
1068        up_read(&oi->ip_xattr_sem);
1069        ocfs2_inode_unlock(d_inode(dentry), 0);
1070
1071        brelse(di_bh);
1072
1073        return i_ret + b_ret;
1074}
1075
1076static int ocfs2_xattr_find_entry(int name_index,
1077                                  const char *name,
1078                                  struct ocfs2_xattr_search *xs)
1079{
1080        struct ocfs2_xattr_entry *entry;
1081        size_t name_len;
1082        int i, cmp = 1;
1083
1084        if (name == NULL)
1085                return -EINVAL;
1086
1087        name_len = strlen(name);
1088        entry = xs->here;
1089        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1090                cmp = name_index - ocfs2_xattr_get_type(entry);
1091                if (!cmp)
1092                        cmp = name_len - entry->xe_name_len;
1093                if (!cmp)
1094                        cmp = memcmp(name, (xs->base +
1095                                     le16_to_cpu(entry->xe_name_offset)),
1096                                     name_len);
1097                if (cmp == 0)
1098                        break;
1099                entry += 1;
1100        }
1101        xs->here = entry;
1102
1103        return cmp ? -ENODATA : 0;
1104}
1105
1106static int ocfs2_xattr_get_value_outside(struct inode *inode,
1107                                         struct ocfs2_xattr_value_root *xv,
1108                                         void *buffer,
1109                                         size_t len)
1110{
1111        u32 cpos, p_cluster, num_clusters, bpc, clusters;
1112        u64 blkno;
1113        int i, ret = 0;
1114        size_t cplen, blocksize;
1115        struct buffer_head *bh = NULL;
1116        struct ocfs2_extent_list *el;
1117
1118        el = &xv->xr_list;
1119        clusters = le32_to_cpu(xv->xr_clusters);
1120        bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1121        blocksize = inode->i_sb->s_blocksize;
1122
1123        cpos = 0;
1124        while (cpos < clusters) {
1125                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1126                                               &num_clusters, el, NULL);
1127                if (ret) {
1128                        mlog_errno(ret);
1129                        goto out;
1130                }
1131
1132                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1133                /* Copy ocfs2_xattr_value */
1134                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1135                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1136                                               &bh, NULL);
1137                        if (ret) {
1138                                mlog_errno(ret);
1139                                goto out;
1140                        }
1141
1142                        cplen = len >= blocksize ? blocksize : len;
1143                        memcpy(buffer, bh->b_data, cplen);
1144                        len -= cplen;
1145                        buffer += cplen;
1146
1147                        brelse(bh);
1148                        bh = NULL;
1149                        if (len == 0)
1150                                break;
1151                }
1152                cpos += num_clusters;
1153        }
1154out:
1155        return ret;
1156}
1157
1158static int ocfs2_xattr_ibody_get(struct inode *inode,
1159                                 int name_index,
1160                                 const char *name,
1161                                 void *buffer,
1162                                 size_t buffer_size,
1163                                 struct ocfs2_xattr_search *xs)
1164{
1165        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1166        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1167        struct ocfs2_xattr_value_root *xv;
1168        size_t size;
1169        int ret = 0;
1170
1171        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1172                return -ENODATA;
1173
1174        xs->end = (void *)di + inode->i_sb->s_blocksize;
1175        xs->header = (struct ocfs2_xattr_header *)
1176                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1177        xs->base = (void *)xs->header;
1178        xs->here = xs->header->xh_entries;
1179
1180        ret = ocfs2_xattr_find_entry(name_index, name, xs);
1181        if (ret)
1182                return ret;
1183        size = le64_to_cpu(xs->here->xe_value_size);
1184        if (buffer) {
1185                if (size > buffer_size)
1186                        return -ERANGE;
1187                if (ocfs2_xattr_is_local(xs->here)) {
1188                        memcpy(buffer, (void *)xs->base +
1189                               le16_to_cpu(xs->here->xe_name_offset) +
1190                               OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1191                } else {
1192                        xv = (struct ocfs2_xattr_value_root *)
1193                                (xs->base + le16_to_cpu(
1194                                 xs->here->xe_name_offset) +
1195                                OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1196                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1197                                                            buffer, size);
1198                        if (ret < 0) {
1199                                mlog_errno(ret);
1200                                return ret;
1201                        }
1202                }
1203        }
1204
1205        return size;
1206}
1207
1208static int ocfs2_xattr_block_get(struct inode *inode,
1209                                 int name_index,
1210                                 const char *name,
1211                                 void *buffer,
1212                                 size_t buffer_size,
1213                                 struct ocfs2_xattr_search *xs)
1214{
1215        struct ocfs2_xattr_block *xb;
1216        struct ocfs2_xattr_value_root *xv;
1217        size_t size;
1218        int ret = -ENODATA, name_offset, name_len, i;
1219        int uninitialized_var(block_off);
1220
1221        xs->bucket = ocfs2_xattr_bucket_new(inode);
1222        if (!xs->bucket) {
1223                ret = -ENOMEM;
1224                mlog_errno(ret);
1225                goto cleanup;
1226        }
1227
1228        ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1229        if (ret) {
1230                mlog_errno(ret);
1231                goto cleanup;
1232        }
1233
1234        if (xs->not_found) {
1235                ret = -ENODATA;
1236                goto cleanup;
1237        }
1238
1239        xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1240        size = le64_to_cpu(xs->here->xe_value_size);
1241        if (buffer) {
1242                ret = -ERANGE;
1243                if (size > buffer_size)
1244                        goto cleanup;
1245
1246                name_offset = le16_to_cpu(xs->here->xe_name_offset);
1247                name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1248                i = xs->here - xs->header->xh_entries;
1249
1250                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1251                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1252                                                                bucket_xh(xs->bucket),
1253                                                                i,
1254                                                                &block_off,
1255                                                                &name_offset);
1256                        if (ret) {
1257                                mlog_errno(ret);
1258                                goto cleanup;
1259                        }
1260                        xs->base = bucket_block(xs->bucket, block_off);
1261                }
1262                if (ocfs2_xattr_is_local(xs->here)) {
1263                        memcpy(buffer, (void *)xs->base +
1264                               name_offset + name_len, size);
1265                } else {
1266                        xv = (struct ocfs2_xattr_value_root *)
1267                                (xs->base + name_offset + name_len);
1268                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1269                                                            buffer, size);
1270                        if (ret < 0) {
1271                                mlog_errno(ret);
1272                                goto cleanup;
1273                        }
1274                }
1275        }
1276        ret = size;
1277cleanup:
1278        ocfs2_xattr_bucket_free(xs->bucket);
1279
1280        brelse(xs->xattr_bh);
1281        xs->xattr_bh = NULL;
1282        return ret;
1283}
1284
1285int ocfs2_xattr_get_nolock(struct inode *inode,
1286                           struct buffer_head *di_bh,
1287                           int name_index,
1288                           const char *name,
1289                           void *buffer,
1290                           size_t buffer_size)
1291{
1292        int ret;
1293        struct ocfs2_dinode *di = NULL;
1294        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1295        struct ocfs2_xattr_search xis = {
1296                .not_found = -ENODATA,
1297        };
1298        struct ocfs2_xattr_search xbs = {
1299                .not_found = -ENODATA,
1300        };
1301
1302        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1303                return -EOPNOTSUPP;
1304
1305        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1306                return -ENODATA;
1307
1308        xis.inode_bh = xbs.inode_bh = di_bh;
1309        di = (struct ocfs2_dinode *)di_bh->b_data;
1310
1311        ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1312                                    buffer_size, &xis);
1313        if (ret == -ENODATA && di->i_xattr_loc)
1314                ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1315                                            buffer_size, &xbs);
1316
1317        return ret;
1318}
1319
1320/* ocfs2_xattr_get()
1321 *
1322 * Copy an extended attribute into the buffer provided.
1323 * Buffer is NULL to compute the size of buffer required.
1324 */
1325static int ocfs2_xattr_get(struct inode *inode,
1326                           int name_index,
1327                           const char *name,
1328                           void *buffer,
1329                           size_t buffer_size)
1330{
1331        int ret;
1332        struct buffer_head *di_bh = NULL;
1333
1334        ret = ocfs2_inode_lock(inode, &di_bh, 0);
1335        if (ret < 0) {
1336                mlog_errno(ret);
1337                return ret;
1338        }
1339        down_read(&OCFS2_I(inode)->ip_xattr_sem);
1340        ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1341                                     name, buffer, buffer_size);
1342        up_read(&OCFS2_I(inode)->ip_xattr_sem);
1343
1344        ocfs2_inode_unlock(inode, 0);
1345
1346        brelse(di_bh);
1347
1348        return ret;
1349}
1350
1351static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1352                                           handle_t *handle,
1353                                           struct ocfs2_xattr_value_buf *vb,
1354                                           const void *value,
1355                                           int value_len)
1356{
1357        int ret = 0, i, cp_len;
1358        u16 blocksize = inode->i_sb->s_blocksize;
1359        u32 p_cluster, num_clusters;
1360        u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1361        u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1362        u64 blkno;
1363        struct buffer_head *bh = NULL;
1364        unsigned int ext_flags;
1365        struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1366
1367        BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1368
1369        while (cpos < clusters) {
1370                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1371                                               &num_clusters, &xv->xr_list,
1372                                               &ext_flags);
1373                if (ret) {
1374                        mlog_errno(ret);
1375                        goto out;
1376                }
1377
1378                BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1379
1380                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1381
1382                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1383                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1384                                               &bh, NULL);
1385                        if (ret) {
1386                                mlog_errno(ret);
1387                                goto out;
1388                        }
1389
1390                        ret = ocfs2_journal_access(handle,
1391                                                   INODE_CACHE(inode),
1392                                                   bh,
1393                                                   OCFS2_JOURNAL_ACCESS_WRITE);
1394                        if (ret < 0) {
1395                                mlog_errno(ret);
1396                                goto out;
1397                        }
1398
1399                        cp_len = value_len > blocksize ? blocksize : value_len;
1400                        memcpy(bh->b_data, value, cp_len);
1401                        value_len -= cp_len;
1402                        value += cp_len;
1403                        if (cp_len < blocksize)
1404                                memset(bh->b_data + cp_len, 0,
1405                                       blocksize - cp_len);
1406
1407                        ocfs2_journal_dirty(handle, bh);
1408                        brelse(bh);
1409                        bh = NULL;
1410
1411                        /*
1412                         * XXX: do we need to empty all the following
1413                         * blocks in this cluster?
1414                         */
1415                        if (!value_len)
1416                                break;
1417                }
1418                cpos += num_clusters;
1419        }
1420out:
1421        brelse(bh);
1422
1423        return ret;
1424}
1425
1426static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1427                                       int num_entries)
1428{
1429        int free_space;
1430
1431        if (!needed_space)
1432                return 0;
1433
1434        free_space = free_start -
1435                sizeof(struct ocfs2_xattr_header) -
1436                (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1437                OCFS2_XATTR_HEADER_GAP;
1438        if (free_space < 0)
1439                return -EIO;
1440        if (free_space < needed_space)
1441                return -ENOSPC;
1442
1443        return 0;
1444}
1445
1446static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1447                                   int type)
1448{
1449        return loc->xl_ops->xlo_journal_access(handle, loc, type);
1450}
1451
1452static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1453{
1454        loc->xl_ops->xlo_journal_dirty(handle, loc);
1455}
1456
1457/* Give a pointer into the storage for the given offset */
1458static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1459{
1460        BUG_ON(offset >= loc->xl_size);
1461        return loc->xl_ops->xlo_offset_pointer(loc, offset);
1462}
1463
1464/*
1465 * Wipe the name+value pair and allow the storage to reclaim it.  This
1466 * must be followed by either removal of the entry or a call to
1467 * ocfs2_xa_add_namevalue().
1468 */
1469static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1470{
1471        loc->xl_ops->xlo_wipe_namevalue(loc);
1472}
1473
1474/*
1475 * Find lowest offset to a name+value pair.  This is the start of our
1476 * downward-growing free space.
1477 */
1478static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1479{
1480        return loc->xl_ops->xlo_get_free_start(loc);
1481}
1482
1483/* Can we reuse loc->xl_entry for xi? */
1484static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1485                                    struct ocfs2_xattr_info *xi)
1486{
1487        return loc->xl_ops->xlo_can_reuse(loc, xi);
1488}
1489
1490/* How much free space is needed to set the new value */
1491static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1492                                struct ocfs2_xattr_info *xi)
1493{
1494        return loc->xl_ops->xlo_check_space(loc, xi);
1495}
1496
1497static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1498{
1499        loc->xl_ops->xlo_add_entry(loc, name_hash);
1500        loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1501        /*
1502         * We can't leave the new entry's xe_name_offset at zero or
1503         * add_namevalue() will go nuts.  We set it to the size of our
1504         * storage so that it can never be less than any other entry.
1505         */
1506        loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1507}
1508
1509static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1510                                   struct ocfs2_xattr_info *xi)
1511{
1512        int size = namevalue_size_xi(xi);
1513        int nameval_offset;
1514        char *nameval_buf;
1515
1516        loc->xl_ops->xlo_add_namevalue(loc, size);
1517        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1518        loc->xl_entry->xe_name_len = xi->xi_name_len;
1519        ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1520        ocfs2_xattr_set_local(loc->xl_entry,
1521                              xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1522
1523        nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1524        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1525        memset(nameval_buf, 0, size);
1526        memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1527}
1528
1529static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1530                                    struct ocfs2_xattr_value_buf *vb)
1531{
1532        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1533        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1534
1535        /* Value bufs are for value trees */
1536        BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1537        BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1538               (name_size + OCFS2_XATTR_ROOT_SIZE));
1539
1540        loc->xl_ops->xlo_fill_value_buf(loc, vb);
1541        vb->vb_xv =
1542                (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1543                                                        nameval_offset +
1544                                                        name_size);
1545}
1546
1547static int ocfs2_xa_block_journal_access(handle_t *handle,
1548                                         struct ocfs2_xa_loc *loc, int type)
1549{
1550        struct buffer_head *bh = loc->xl_storage;
1551        ocfs2_journal_access_func access;
1552
1553        if (loc->xl_size == (bh->b_size -
1554                             offsetof(struct ocfs2_xattr_block,
1555                                      xb_attrs.xb_header)))
1556                access = ocfs2_journal_access_xb;
1557        else
1558                access = ocfs2_journal_access_di;
1559        return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1560}
1561
1562static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1563                                         struct ocfs2_xa_loc *loc)
1564{
1565        struct buffer_head *bh = loc->xl_storage;
1566
1567        ocfs2_journal_dirty(handle, bh);
1568}
1569
1570static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1571                                           int offset)
1572{
1573        return (char *)loc->xl_header + offset;
1574}
1575
1576static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1577                                    struct ocfs2_xattr_info *xi)
1578{
1579        /*
1580         * Block storage is strict.  If the sizes aren't exact, we will
1581         * remove the old one and reinsert the new.
1582         */
1583        return namevalue_size_xe(loc->xl_entry) ==
1584                namevalue_size_xi(xi);
1585}
1586
1587static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1588{
1589        struct ocfs2_xattr_header *xh = loc->xl_header;
1590        int i, count = le16_to_cpu(xh->xh_count);
1591        int offset, free_start = loc->xl_size;
1592
1593        for (i = 0; i < count; i++) {
1594                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1595                if (offset < free_start)
1596                        free_start = offset;
1597        }
1598
1599        return free_start;
1600}
1601
1602static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1603                                      struct ocfs2_xattr_info *xi)
1604{
1605        int count = le16_to_cpu(loc->xl_header->xh_count);
1606        int free_start = ocfs2_xa_get_free_start(loc);
1607        int needed_space = ocfs2_xi_entry_usage(xi);
1608
1609        /*
1610         * Block storage will reclaim the original entry before inserting
1611         * the new value, so we only need the difference.  If the new
1612         * entry is smaller than the old one, we don't need anything.
1613         */
1614        if (loc->xl_entry) {
1615                /* Don't need space if we're reusing! */
1616                if (ocfs2_xa_can_reuse_entry(loc, xi))
1617                        needed_space = 0;
1618                else
1619                        needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1620        }
1621        if (needed_space < 0)
1622                needed_space = 0;
1623        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1624}
1625
1626/*
1627 * Block storage for xattrs keeps the name+value pairs compacted.  When
1628 * we remove one, we have to shift any that preceded it towards the end.
1629 */
1630static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1631{
1632        int i, offset;
1633        int namevalue_offset, first_namevalue_offset, namevalue_size;
1634        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1635        struct ocfs2_xattr_header *xh = loc->xl_header;
1636        int count = le16_to_cpu(xh->xh_count);
1637
1638        namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1639        namevalue_size = namevalue_size_xe(entry);
1640        first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1641
1642        /* Shift the name+value pairs */
1643        memmove((char *)xh + first_namevalue_offset + namevalue_size,
1644                (char *)xh + first_namevalue_offset,
1645                namevalue_offset - first_namevalue_offset);
1646        memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1647
1648        /* Now tell xh->xh_entries about it */
1649        for (i = 0; i < count; i++) {
1650                offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1651                if (offset <= namevalue_offset)
1652                        le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1653                                     namevalue_size);
1654        }
1655
1656        /*
1657         * Note that we don't update xh_free_start or xh_name_value_len
1658         * because they're not used in block-stored xattrs.
1659         */
1660}
1661
1662static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1663{
1664        int count = le16_to_cpu(loc->xl_header->xh_count);
1665        loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1666        le16_add_cpu(&loc->xl_header->xh_count, 1);
1667        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1668}
1669
1670static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1671{
1672        int free_start = ocfs2_xa_get_free_start(loc);
1673
1674        loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1675}
1676
1677static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1678                                          struct ocfs2_xattr_value_buf *vb)
1679{
1680        struct buffer_head *bh = loc->xl_storage;
1681
1682        if (loc->xl_size == (bh->b_size -
1683                             offsetof(struct ocfs2_xattr_block,
1684                                      xb_attrs.xb_header)))
1685                vb->vb_access = ocfs2_journal_access_xb;
1686        else
1687                vb->vb_access = ocfs2_journal_access_di;
1688        vb->vb_bh = bh;
1689}
1690
1691/*
1692 * Operations for xattrs stored in blocks.  This includes inline inode
1693 * storage and unindexed ocfs2_xattr_blocks.
1694 */
1695static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1696        .xlo_journal_access     = ocfs2_xa_block_journal_access,
1697        .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1698        .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1699        .xlo_check_space        = ocfs2_xa_block_check_space,
1700        .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1701        .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1702        .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1703        .xlo_add_entry          = ocfs2_xa_block_add_entry,
1704        .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1705        .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1706};
1707
1708static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1709                                          struct ocfs2_xa_loc *loc, int type)
1710{
1711        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1712
1713        return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1714}
1715
1716static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1717                                          struct ocfs2_xa_loc *loc)
1718{
1719        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1720
1721        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1722}
1723
1724static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1725                                            int offset)
1726{
1727        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1728        int block, block_offset;
1729
1730        /* The header is at the front of the bucket */
1731        block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1732        block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1733
1734        return bucket_block(bucket, block) + block_offset;
1735}
1736
1737static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1738                                     struct ocfs2_xattr_info *xi)
1739{
1740        return namevalue_size_xe(loc->xl_entry) >=
1741                namevalue_size_xi(xi);
1742}
1743
1744static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1745{
1746        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1747        return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1748}
1749
1750static int ocfs2_bucket_align_free_start(struct super_block *sb,
1751                                         int free_start, int size)
1752{
1753        /*
1754         * We need to make sure that the name+value pair fits within
1755         * one block.
1756         */
1757        if (((free_start - size) >> sb->s_blocksize_bits) !=
1758            ((free_start - 1) >> sb->s_blocksize_bits))
1759                free_start -= free_start % sb->s_blocksize;
1760
1761        return free_start;
1762}
1763
1764static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1765                                       struct ocfs2_xattr_info *xi)
1766{
1767        int rc;
1768        int count = le16_to_cpu(loc->xl_header->xh_count);
1769        int free_start = ocfs2_xa_get_free_start(loc);
1770        int needed_space = ocfs2_xi_entry_usage(xi);
1771        int size = namevalue_size_xi(xi);
1772        struct super_block *sb = loc->xl_inode->i_sb;
1773
1774        /*
1775         * Bucket storage does not reclaim name+value pairs it cannot
1776         * reuse.  They live as holes until the bucket fills, and then
1777         * the bucket is defragmented.  However, the bucket can reclaim
1778         * the ocfs2_xattr_entry.
1779         */
1780        if (loc->xl_entry) {
1781                /* Don't need space if we're reusing! */
1782                if (ocfs2_xa_can_reuse_entry(loc, xi))
1783                        needed_space = 0;
1784                else
1785                        needed_space -= sizeof(struct ocfs2_xattr_entry);
1786        }
1787        BUG_ON(needed_space < 0);
1788
1789        if (free_start < size) {
1790                if (needed_space)
1791                        return -ENOSPC;
1792        } else {
1793                /*
1794                 * First we check if it would fit in the first place.
1795                 * Below, we align the free start to a block.  This may
1796                 * slide us below the minimum gap.  By checking unaligned
1797                 * first, we avoid that error.
1798                 */
1799                rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1800                                                 count);
1801                if (rc)
1802                        return rc;
1803                free_start = ocfs2_bucket_align_free_start(sb, free_start,
1804                                                           size);
1805        }
1806        return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1807}
1808
1809static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1810{
1811        le16_add_cpu(&loc->xl_header->xh_name_value_len,
1812                     -namevalue_size_xe(loc->xl_entry));
1813}
1814
1815static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1816{
1817        struct ocfs2_xattr_header *xh = loc->xl_header;
1818        int count = le16_to_cpu(xh->xh_count);
1819        int low = 0, high = count - 1, tmp;
1820        struct ocfs2_xattr_entry *tmp_xe;
1821
1822        /*
1823         * We keep buckets sorted by name_hash, so we need to find
1824         * our insert place.
1825         */
1826        while (low <= high && count) {
1827                tmp = (low + high) / 2;
1828                tmp_xe = &xh->xh_entries[tmp];
1829
1830                if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1831                        low = tmp + 1;
1832                else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1833                        high = tmp - 1;
1834                else {
1835                        low = tmp;
1836                        break;
1837                }
1838        }
1839
1840        if (low != count)
1841                memmove(&xh->xh_entries[low + 1],
1842                        &xh->xh_entries[low],
1843                        ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1844
1845        le16_add_cpu(&xh->xh_count, 1);
1846        loc->xl_entry = &xh->xh_entries[low];
1847        memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1848}
1849
1850static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1851{
1852        int free_start = ocfs2_xa_get_free_start(loc);
1853        struct ocfs2_xattr_header *xh = loc->xl_header;
1854        struct super_block *sb = loc->xl_inode->i_sb;
1855        int nameval_offset;
1856
1857        free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1858        nameval_offset = free_start - size;
1859        loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1860        xh->xh_free_start = cpu_to_le16(nameval_offset);
1861        le16_add_cpu(&xh->xh_name_value_len, size);
1862
1863}
1864
1865static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1866                                           struct ocfs2_xattr_value_buf *vb)
1867{
1868        struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1869        struct super_block *sb = loc->xl_inode->i_sb;
1870        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1871        int size = namevalue_size_xe(loc->xl_entry);
1872        int block_offset = nameval_offset >> sb->s_blocksize_bits;
1873
1874        /* Values are not allowed to straddle block boundaries */
1875        BUG_ON(block_offset !=
1876               ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1877        /* We expect the bucket to be filled in */
1878        BUG_ON(!bucket->bu_bhs[block_offset]);
1879
1880        vb->vb_access = ocfs2_journal_access;
1881        vb->vb_bh = bucket->bu_bhs[block_offset];
1882}
1883
1884/* Operations for xattrs stored in buckets. */
1885static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1886        .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1887        .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1888        .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1889        .xlo_check_space        = ocfs2_xa_bucket_check_space,
1890        .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1891        .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1892        .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1893        .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1894        .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1895        .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1896};
1897
1898static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1899{
1900        struct ocfs2_xattr_value_buf vb;
1901
1902        if (ocfs2_xattr_is_local(loc->xl_entry))
1903                return 0;
1904
1905        ocfs2_xa_fill_value_buf(loc, &vb);
1906        return le32_to_cpu(vb.vb_xv->xr_clusters);
1907}
1908
1909static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1910                                   struct ocfs2_xattr_set_ctxt *ctxt)
1911{
1912        int trunc_rc, access_rc;
1913        struct ocfs2_xattr_value_buf vb;
1914
1915        ocfs2_xa_fill_value_buf(loc, &vb);
1916        trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1917                                              ctxt);
1918
1919        /*
1920         * The caller of ocfs2_xa_value_truncate() has already called
1921         * ocfs2_xa_journal_access on the loc.  However, The truncate code
1922         * calls ocfs2_extend_trans().  This may commit the previous
1923         * transaction and open a new one.  If this is a bucket, truncate
1924         * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1925         * the caller is expecting to dirty the entire bucket.  So we must
1926         * reset the journal work.  We do this even if truncate has failed,
1927         * as it could have failed after committing the extend.
1928         */
1929        access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1930                                            OCFS2_JOURNAL_ACCESS_WRITE);
1931
1932        /* Errors in truncate take precedence */
1933        return trunc_rc ? trunc_rc : access_rc;
1934}
1935
1936static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1937{
1938        int index, count;
1939        struct ocfs2_xattr_header *xh = loc->xl_header;
1940        struct ocfs2_xattr_entry *entry = loc->xl_entry;
1941
1942        ocfs2_xa_wipe_namevalue(loc);
1943        loc->xl_entry = NULL;
1944
1945        le16_add_cpu(&xh->xh_count, -1);
1946        count = le16_to_cpu(xh->xh_count);
1947
1948        /*
1949         * Only zero out the entry if there are more remaining.  This is
1950         * important for an empty bucket, as it keeps track of the
1951         * bucket's hash value.  It doesn't hurt empty block storage.
1952         */
1953        if (count) {
1954                index = ((char *)entry - (char *)&xh->xh_entries) /
1955                        sizeof(struct ocfs2_xattr_entry);
1956                memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1957                        (count - index) * sizeof(struct ocfs2_xattr_entry));
1958                memset(&xh->xh_entries[count], 0,
1959                       sizeof(struct ocfs2_xattr_entry));
1960        }
1961}
1962
1963/*
1964 * If we have a problem adjusting the size of an external value during
1965 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1966 * in an intermediate state.  For example, the value may be partially
1967 * truncated.
1968 *
1969 * If the value tree hasn't changed, the extend/truncate went nowhere.
1970 * We have nothing to do.  The caller can treat it as a straight error.
1971 *
1972 * If the value tree got partially truncated, we now have a corrupted
1973 * extended attribute.  We're going to wipe its entry and leak the
1974 * clusters.  Better to leak some storage than leave a corrupt entry.
1975 *
1976 * If the value tree grew, it obviously didn't grow enough for the
1977 * new entry.  We're not going to try and reclaim those clusters either.
1978 * If there was already an external value there (orig_clusters != 0),
1979 * the new clusters are attached safely and we can just leave the old
1980 * value in place.  If there was no external value there, we remove
1981 * the entry.
1982 *
1983 * This way, the xattr block we store in the journal will be consistent.
1984 * If the size change broke because of the journal, no changes will hit
1985 * disk anyway.
1986 */
1987static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1988                                            const char *what,
1989                                            unsigned int orig_clusters)
1990{
1991        unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1992        char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1993                                le16_to_cpu(loc->xl_entry->xe_name_offset));
1994
1995        if (new_clusters < orig_clusters) {
1996                mlog(ML_ERROR,
1997                     "Partial truncate while %s xattr %.*s.  Leaking "
1998                     "%u clusters and removing the entry\n",
1999                     what, loc->xl_entry->xe_name_len, nameval_buf,
2000                     orig_clusters - new_clusters);
2001                ocfs2_xa_remove_entry(loc);
2002        } else if (!orig_clusters) {
2003                mlog(ML_ERROR,
2004                     "Unable to allocate an external value for xattr "
2005                     "%.*s safely.  Leaking %u clusters and removing the "
2006                     "entry\n",
2007                     loc->xl_entry->xe_name_len, nameval_buf,
2008                     new_clusters - orig_clusters);
2009                ocfs2_xa_remove_entry(loc);
2010        } else if (new_clusters > orig_clusters)
2011                mlog(ML_ERROR,
2012                     "Unable to grow xattr %.*s safely.  %u new clusters "
2013                     "have been added, but the value will not be "
2014                     "modified\n",
2015                     loc->xl_entry->xe_name_len, nameval_buf,
2016                     new_clusters - orig_clusters);
2017}
2018
2019static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
2020                           struct ocfs2_xattr_set_ctxt *ctxt)
2021{
2022        int rc = 0;
2023        unsigned int orig_clusters;
2024
2025        if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2026                orig_clusters = ocfs2_xa_value_clusters(loc);
2027                rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2028                if (rc) {
2029                        mlog_errno(rc);
2030                        /*
2031                         * Since this is remove, we can return 0 if
2032                         * ocfs2_xa_cleanup_value_truncate() is going to
2033                         * wipe the entry anyway.  So we check the
2034                         * cluster count as well.
2035                         */
2036                        if (orig_clusters != ocfs2_xa_value_clusters(loc))
2037                                rc = 0;
2038                        ocfs2_xa_cleanup_value_truncate(loc, "removing",
2039                                                        orig_clusters);
2040                        if (rc)
2041                                goto out;
2042                }
2043        }
2044
2045        ocfs2_xa_remove_entry(loc);
2046
2047out:
2048        return rc;
2049}
2050
2051static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2052{
2053        int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2054        char *nameval_buf;
2055
2056        nameval_buf = ocfs2_xa_offset_pointer(loc,
2057                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2058        memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2059}
2060
2061/*
2062 * Take an existing entry and make it ready for the new value.  This
2063 * won't allocate space, but it may free space.  It should be ready for
2064 * ocfs2_xa_prepare_entry() to finish the work.
2065 */
2066static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2067                                struct ocfs2_xattr_info *xi,
2068                                struct ocfs2_xattr_set_ctxt *ctxt)
2069{
2070        int rc = 0;
2071        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2072        unsigned int orig_clusters;
2073        char *nameval_buf;
2074        int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2075        int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2076
2077        BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2078               name_size);
2079
2080        nameval_buf = ocfs2_xa_offset_pointer(loc,
2081                                le16_to_cpu(loc->xl_entry->xe_name_offset));
2082        if (xe_local) {
2083                memset(nameval_buf + name_size, 0,
2084                       namevalue_size_xe(loc->xl_entry) - name_size);
2085                if (!xi_local)
2086                        ocfs2_xa_install_value_root(loc);
2087        } else {
2088                orig_clusters = ocfs2_xa_value_clusters(loc);
2089                if (xi_local) {
2090                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2091                        if (rc < 0)
2092                                mlog_errno(rc);
2093                        else
2094                                memset(nameval_buf + name_size, 0,
2095                                       namevalue_size_xe(loc->xl_entry) -
2096                                       name_size);
2097                } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2098                           xi->xi_value_len) {
2099                        rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2100                                                     ctxt);
2101                        if (rc < 0)
2102                                mlog_errno(rc);
2103                }
2104
2105                if (rc) {
2106                        ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2107                                                        orig_clusters);
2108                        goto out;
2109                }
2110        }
2111
2112        loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2113        ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2114
2115out:
2116        return rc;
2117}
2118
2119/*
2120 * Prepares loc->xl_entry to receive the new xattr.  This includes
2121 * properly setting up the name+value pair region.  If loc->xl_entry
2122 * already exists, it will take care of modifying it appropriately.
2123 *
2124 * Note that this modifies the data.  You did journal_access already,
2125 * right?
2126 */
2127static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2128                                  struct ocfs2_xattr_info *xi,
2129                                  u32 name_hash,
2130                                  struct ocfs2_xattr_set_ctxt *ctxt)
2131{
2132        int rc = 0;
2133        unsigned int orig_clusters;
2134        __le64 orig_value_size = 0;
2135
2136        rc = ocfs2_xa_check_space(loc, xi);
2137        if (rc)
2138                goto out;
2139
2140        if (loc->xl_entry) {
2141                if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2142                        orig_value_size = loc->xl_entry->xe_value_size;
2143                        rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2144                        if (rc)
2145                                goto out;
2146                        goto alloc_value;
2147                }
2148
2149                if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2150                        orig_clusters = ocfs2_xa_value_clusters(loc);
2151                        rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2152                        if (rc) {
2153                                mlog_errno(rc);
2154                                ocfs2_xa_cleanup_value_truncate(loc,
2155                                                                "overwriting",
2156                                                                orig_clusters);
2157                                goto out;
2158                        }
2159                }
2160                ocfs2_xa_wipe_namevalue(loc);
2161        } else
2162                ocfs2_xa_add_entry(loc, name_hash);
2163
2164        /*
2165         * If we get here, we have a blank entry.  Fill it.  We grow our
2166         * name+value pair back from the end.
2167         */
2168        ocfs2_xa_add_namevalue(loc, xi);
2169        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2170                ocfs2_xa_install_value_root(loc);
2171
2172alloc_value:
2173        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2174                orig_clusters = ocfs2_xa_value_clusters(loc);
2175                rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2176                if (rc < 0) {
2177                        ctxt->set_abort = 1;
2178                        ocfs2_xa_cleanup_value_truncate(loc, "growing",
2179                                                        orig_clusters);
2180                        /*
2181                         * If we were growing an existing value,
2182                         * ocfs2_xa_cleanup_value_truncate() won't remove
2183                         * the entry. We need to restore the original value
2184                         * size.
2185                         */
2186                        if (loc->xl_entry) {
2187                                BUG_ON(!orig_value_size);
2188                                loc->xl_entry->xe_value_size = orig_value_size;
2189                        }
2190                        mlog_errno(rc);
2191                }
2192        }
2193
2194out:
2195        return rc;
2196}
2197
2198/*
2199 * Store the value portion of the name+value pair.  This will skip
2200 * values that are stored externally.  Their tree roots were set up
2201 * by ocfs2_xa_prepare_entry().
2202 */
2203static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2204                                struct ocfs2_xattr_info *xi,
2205                                struct ocfs2_xattr_set_ctxt *ctxt)
2206{
2207        int rc = 0;
2208        int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2209        int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2210        char *nameval_buf;
2211        struct ocfs2_xattr_value_buf vb;
2212
2213        nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2214        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2215                ocfs2_xa_fill_value_buf(loc, &vb);
2216                rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2217                                                     ctxt->handle, &vb,
2218                                                     xi->xi_value,
2219                                                     xi->xi_value_len);
2220        } else
2221                memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2222
2223        return rc;
2224}
2225
2226static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2227                        struct ocfs2_xattr_info *xi,
2228                        struct ocfs2_xattr_set_ctxt *ctxt)
2229{
2230        int ret;
2231        u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2232                                              xi->xi_name_len);
2233
2234        ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2235                                      OCFS2_JOURNAL_ACCESS_WRITE);
2236        if (ret) {
2237                mlog_errno(ret);
2238                goto out;
2239        }
2240
2241        /*
2242         * From here on out, everything is going to modify the buffer a
2243         * little.  Errors are going to leave the xattr header in a
2244         * sane state.  Thus, even with errors we dirty the sucker.
2245         */
2246
2247        /* Don't worry, we are never called with !xi_value and !xl_entry */
2248        if (!xi->xi_value) {
2249                ret = ocfs2_xa_remove(loc, ctxt);
2250                goto out_dirty;
2251        }
2252
2253        ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2254        if (ret) {
2255                if (ret != -ENOSPC)
2256                        mlog_errno(ret);
2257                goto out_dirty;
2258        }
2259
2260        ret = ocfs2_xa_store_value(loc, xi, ctxt);
2261        if (ret)
2262                mlog_errno(ret);
2263
2264out_dirty:
2265        ocfs2_xa_journal_dirty(ctxt->handle, loc);
2266
2267out:
2268        return ret;
2269}
2270
2271static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2272                                     struct inode *inode,
2273                                     struct buffer_head *bh,
2274                                     struct ocfs2_xattr_entry *entry)
2275{
2276        struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2277
2278        BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2279
2280        loc->xl_inode = inode;
2281        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2282        loc->xl_storage = bh;
2283        loc->xl_entry = entry;
2284        loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2285        loc->xl_header =
2286                (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2287                                              loc->xl_size);
2288}
2289
2290static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2291                                          struct inode *inode,
2292                                          struct buffer_head *bh,
2293                                          struct ocfs2_xattr_entry *entry)
2294{
2295        struct ocfs2_xattr_block *xb =
2296                (struct ocfs2_xattr_block *)bh->b_data;
2297
2298        BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2299
2300        loc->xl_inode = inode;
2301        loc->xl_ops = &ocfs2_xa_block_loc_ops;
2302        loc->xl_storage = bh;
2303        loc->xl_header = &(xb->xb_attrs.xb_header);
2304        loc->xl_entry = entry;
2305        loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2306                                             xb_attrs.xb_header);
2307}
2308
2309static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2310                                           struct ocfs2_xattr_bucket *bucket,
2311                                           struct ocfs2_xattr_entry *entry)
2312{
2313        loc->xl_inode = bucket->bu_inode;
2314        loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2315        loc->xl_storage = bucket;
2316        loc->xl_header = bucket_xh(bucket);
2317        loc->xl_entry = entry;
2318        loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2319}
2320
2321/*
2322 * In xattr remove, if it is stored outside and refcounted, we may have
2323 * the chance to split the refcount tree. So need the allocators.
2324 */
2325static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2326                                        struct ocfs2_xattr_value_root *xv,
2327                                        struct ocfs2_caching_info *ref_ci,
2328                                        struct buffer_head *ref_root_bh,
2329                                        struct ocfs2_alloc_context **meta_ac,
2330                                        int *ref_credits)
2331{
2332        int ret, meta_add = 0;
2333        u32 p_cluster, num_clusters;
2334        unsigned int ext_flags;
2335
2336        *ref_credits = 0;
2337        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2338                                       &num_clusters,
2339                                       &xv->xr_list,
2340                                       &ext_flags);
2341        if (ret) {
2342                mlog_errno(ret);
2343                goto out;
2344        }
2345
2346        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2347                goto out;
2348
2349        ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2350                                                 ref_root_bh, xv,
2351                                                 &meta_add, ref_credits);
2352        if (ret) {
2353                mlog_errno(ret);
2354                goto out;
2355        }
2356
2357        ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2358                                                meta_add, meta_ac);
2359        if (ret)
2360                mlog_errno(ret);
2361
2362out:
2363        return ret;
2364}
2365
2366static int ocfs2_remove_value_outside(struct inode*inode,
2367                                      struct ocfs2_xattr_value_buf *vb,
2368                                      struct ocfs2_xattr_header *header,
2369                                      struct ocfs2_caching_info *ref_ci,
2370                                      struct buffer_head *ref_root_bh)
2371{
2372        int ret = 0, i, ref_credits;
2373        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2374        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2375        void *val;
2376
2377        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2378
2379        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2380                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2381
2382                if (ocfs2_xattr_is_local(entry))
2383                        continue;
2384
2385                val = (void *)header +
2386                        le16_to_cpu(entry->xe_name_offset);
2387                vb->vb_xv = (struct ocfs2_xattr_value_root *)
2388                        (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2389
2390                ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2391                                                         ref_ci, ref_root_bh,
2392                                                         &ctxt.meta_ac,
2393                                                         &ref_credits);
2394
2395                ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2396                                        ocfs2_remove_extent_credits(osb->sb));
2397                if (IS_ERR(ctxt.handle)) {
2398                        ret = PTR_ERR(ctxt.handle);
2399                        mlog_errno(ret);
2400                        break;
2401                }
2402
2403                ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2404
2405                ocfs2_commit_trans(osb, ctxt.handle);
2406                if (ctxt.meta_ac) {
2407                        ocfs2_free_alloc_context(ctxt.meta_ac);
2408                        ctxt.meta_ac = NULL;
2409                }
2410
2411                if (ret < 0) {
2412                        mlog_errno(ret);
2413                        break;
2414                }
2415
2416        }
2417
2418        if (ctxt.meta_ac)
2419                ocfs2_free_alloc_context(ctxt.meta_ac);
2420        ocfs2_schedule_truncate_log_flush(osb, 1);
2421        ocfs2_run_deallocs(osb, &ctxt.dealloc);
2422        return ret;
2423}
2424
2425static int ocfs2_xattr_ibody_remove(struct inode *inode,
2426                                    struct buffer_head *di_bh,
2427                                    struct ocfs2_caching_info *ref_ci,
2428                                    struct buffer_head *ref_root_bh)
2429{
2430
2431        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2432        struct ocfs2_xattr_header *header;
2433        int ret;
2434        struct ocfs2_xattr_value_buf vb = {
2435                .vb_bh = di_bh,
2436                .vb_access = ocfs2_journal_access_di,
2437        };
2438
2439        header = (struct ocfs2_xattr_header *)
2440                 ((void *)di + inode->i_sb->s_blocksize -
2441                 le16_to_cpu(di->i_xattr_inline_size));
2442
2443        ret = ocfs2_remove_value_outside(inode, &vb, header,
2444                                         ref_ci, ref_root_bh);
2445
2446        return ret;
2447}
2448
2449struct ocfs2_rm_xattr_bucket_para {
2450        struct ocfs2_caching_info *ref_ci;
2451        struct buffer_head *ref_root_bh;
2452};
2453
2454static int ocfs2_xattr_block_remove(struct inode *inode,
2455                                    struct buffer_head *blk_bh,
2456                                    struct ocfs2_caching_info *ref_ci,
2457                                    struct buffer_head *ref_root_bh)
2458{
2459        struct ocfs2_xattr_block *xb;
2460        int ret = 0;
2461        struct ocfs2_xattr_value_buf vb = {
2462                .vb_bh = blk_bh,
2463                .vb_access = ocfs2_journal_access_xb,
2464        };
2465        struct ocfs2_rm_xattr_bucket_para args = {
2466                .ref_ci = ref_ci,
2467                .ref_root_bh = ref_root_bh,
2468        };
2469
2470        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2471        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2472                struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2473                ret = ocfs2_remove_value_outside(inode, &vb, header,
2474                                                 ref_ci, ref_root_bh);
2475        } else
2476                ret = ocfs2_iterate_xattr_index_block(inode,
2477                                                blk_bh,
2478                                                ocfs2_rm_xattr_cluster,
2479                                                &args);
2480
2481        return ret;
2482}
2483
2484static int ocfs2_xattr_free_block(struct inode *inode,
2485                                  u64 block,
2486                                  struct ocfs2_caching_info *ref_ci,
2487                                  struct buffer_head *ref_root_bh)
2488{
2489        struct inode *xb_alloc_inode;
2490        struct buffer_head *xb_alloc_bh = NULL;
2491        struct buffer_head *blk_bh = NULL;
2492        struct ocfs2_xattr_block *xb;
2493        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2494        handle_t *handle;
2495        int ret = 0;
2496        u64 blk, bg_blkno;
2497        u16 bit;
2498
2499        ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2500        if (ret < 0) {
2501                mlog_errno(ret);
2502                goto out;
2503        }
2504
2505        ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2506        if (ret < 0) {
2507                mlog_errno(ret);
2508                goto out;
2509        }
2510
2511        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2512        blk = le64_to_cpu(xb->xb_blkno);
2513        bit = le16_to_cpu(xb->xb_suballoc_bit);
2514        if (xb->xb_suballoc_loc)
2515                bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2516        else
2517                bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2518
2519        xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2520                                EXTENT_ALLOC_SYSTEM_INODE,
2521                                le16_to_cpu(xb->xb_suballoc_slot));
2522        if (!xb_alloc_inode) {
2523                ret = -ENOMEM;
2524                mlog_errno(ret);
2525                goto out;
2526        }
2527        inode_lock(xb_alloc_inode);
2528
2529        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2530        if (ret < 0) {
2531                mlog_errno(ret);
2532                goto out_mutex;
2533        }
2534
2535        handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2536        if (IS_ERR(handle)) {
2537                ret = PTR_ERR(handle);
2538                mlog_errno(ret);
2539                goto out_unlock;
2540        }
2541
2542        ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2543                                       bit, bg_blkno, 1);
2544        if (ret < 0)
2545                mlog_errno(ret);
2546
2547        ocfs2_commit_trans(osb, handle);
2548out_unlock:
2549        ocfs2_inode_unlock(xb_alloc_inode, 1);
2550        brelse(xb_alloc_bh);
2551out_mutex:
2552        inode_unlock(xb_alloc_inode);
2553        iput(xb_alloc_inode);
2554out:
2555        brelse(blk_bh);
2556        return ret;
2557}
2558
2559/*
2560 * ocfs2_xattr_remove()
2561 *
2562 * Free extended attribute resources associated with this inode.
2563 */
2564int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2565{
2566        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2567        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2568        struct ocfs2_refcount_tree *ref_tree = NULL;
2569        struct buffer_head *ref_root_bh = NULL;
2570        struct ocfs2_caching_info *ref_ci = NULL;
2571        handle_t *handle;
2572        int ret;
2573
2574        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2575                return 0;
2576
2577        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2578                return 0;
2579
2580        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2581                ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2582                                               le64_to_cpu(di->i_refcount_loc),
2583                                               1, &ref_tree, &ref_root_bh);
2584                if (ret) {
2585                        mlog_errno(ret);
2586                        goto out;
2587                }
2588                ref_ci = &ref_tree->rf_ci;
2589
2590        }
2591
2592        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2593                ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2594                                               ref_ci, ref_root_bh);
2595                if (ret < 0) {
2596                        mlog_errno(ret);
2597                        goto out;
2598                }
2599        }
2600
2601        if (di->i_xattr_loc) {
2602                ret = ocfs2_xattr_free_block(inode,
2603                                             le64_to_cpu(di->i_xattr_loc),
2604                                             ref_ci, ref_root_bh);
2605                if (ret < 0) {
2606                        mlog_errno(ret);
2607                        goto out;
2608                }
2609        }
2610
2611        handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2612                                   OCFS2_INODE_UPDATE_CREDITS);
2613        if (IS_ERR(handle)) {
2614                ret = PTR_ERR(handle);
2615                mlog_errno(ret);
2616                goto out;
2617        }
2618        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2619                                      OCFS2_JOURNAL_ACCESS_WRITE);
2620        if (ret) {
2621                mlog_errno(ret);
2622                goto out_commit;
2623        }
2624
2625        di->i_xattr_loc = 0;
2626
2627        spin_lock(&oi->ip_lock);
2628        oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2629        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2630        spin_unlock(&oi->ip_lock);
2631        ocfs2_update_inode_fsync_trans(handle, inode, 0);
2632
2633        ocfs2_journal_dirty(handle, di_bh);
2634out_commit:
2635        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2636out:
2637        if (ref_tree)
2638                ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2639        brelse(ref_root_bh);
2640        return ret;
2641}
2642
2643static int ocfs2_xattr_has_space_inline(struct inode *inode,
2644                                        struct ocfs2_dinode *di)
2645{
2646        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2647        unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2648        int free;
2649
2650        if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2651                return 0;
2652
2653        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2654                struct ocfs2_inline_data *idata = &di->id2.i_data;
2655                free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2656        } else if (ocfs2_inode_is_fast_symlink(inode)) {
2657                free = ocfs2_fast_symlink_chars(inode->i_sb) -
2658                        le64_to_cpu(di->i_size);
2659        } else {
2660                struct ocfs2_extent_list *el = &di->id2.i_list;
2661                free = (le16_to_cpu(el->l_count) -
2662                        le16_to_cpu(el->l_next_free_rec)) *
2663                        sizeof(struct ocfs2_extent_rec);
2664        }
2665        if (free >= xattrsize)
2666                return 1;
2667
2668        return 0;
2669}
2670
2671/*
2672 * ocfs2_xattr_ibody_find()
2673 *
2674 * Find extended attribute in inode block and
2675 * fill search info into struct ocfs2_xattr_search.
2676 */
2677static int ocfs2_xattr_ibody_find(struct inode *inode,
2678                                  int name_index,
2679                                  const char *name,
2680                                  struct ocfs2_xattr_search *xs)
2681{
2682        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2683        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2684        int ret;
2685        int has_space = 0;
2686
2687        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2688                return 0;
2689
2690        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2691                down_read(&oi->ip_alloc_sem);
2692                has_space = ocfs2_xattr_has_space_inline(inode, di);
2693                up_read(&oi->ip_alloc_sem);
2694                if (!has_space)
2695                        return 0;
2696        }
2697
2698        xs->xattr_bh = xs->inode_bh;
2699        xs->end = (void *)di + inode->i_sb->s_blocksize;
2700        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2701                xs->header = (struct ocfs2_xattr_header *)
2702                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2703        else
2704                xs->header = (struct ocfs2_xattr_header *)
2705                        (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2706        xs->base = (void *)xs->header;
2707        xs->here = xs->header->xh_entries;
2708
2709        /* Find the named attribute. */
2710        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2711                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2712                if (ret && ret != -ENODATA)
2713                        return ret;
2714                xs->not_found = ret;
2715        }
2716
2717        return 0;
2718}
2719
2720static int ocfs2_xattr_ibody_init(struct inode *inode,
2721                                  struct buffer_head *di_bh,
2722                                  struct ocfs2_xattr_set_ctxt *ctxt)
2723{
2724        int ret;
2725        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2726        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2727        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2728        unsigned int xattrsize = osb->s_xattr_inline_size;
2729
2730        if (!ocfs2_xattr_has_space_inline(inode, di)) {
2731                ret = -ENOSPC;
2732                goto out;
2733        }
2734
2735        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2736                                      OCFS2_JOURNAL_ACCESS_WRITE);
2737        if (ret) {
2738                mlog_errno(ret);
2739                goto out;
2740        }
2741
2742        /*
2743         * Adjust extent record count or inline data size
2744         * to reserve space for extended attribute.
2745         */
2746        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2747                struct ocfs2_inline_data *idata = &di->id2.i_data;
2748                le16_add_cpu(&idata->id_count, -xattrsize);
2749        } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2750                struct ocfs2_extent_list *el = &di->id2.i_list;
2751                le16_add_cpu(&el->l_count, -(xattrsize /
2752                                             sizeof(struct ocfs2_extent_rec)));
2753        }
2754        di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2755
2756        spin_lock(&oi->ip_lock);
2757        oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2758        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2759        spin_unlock(&oi->ip_lock);
2760
2761        ocfs2_journal_dirty(ctxt->handle, di_bh);
2762
2763out:
2764        return ret;
2765}
2766
2767/*
2768 * ocfs2_xattr_ibody_set()
2769 *
2770 * Set, replace or remove an extended attribute into inode block.
2771 *
2772 */
2773static int ocfs2_xattr_ibody_set(struct inode *inode,
2774                                 struct ocfs2_xattr_info *xi,
2775                                 struct ocfs2_xattr_search *xs,
2776                                 struct ocfs2_xattr_set_ctxt *ctxt)
2777{
2778        int ret;
2779        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2780        struct ocfs2_xa_loc loc;
2781
2782        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2783                return -ENOSPC;
2784
2785        down_write(&oi->ip_alloc_sem);
2786        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2787                ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2788                if (ret) {
2789                        if (ret != -ENOSPC)
2790                                mlog_errno(ret);
2791                        goto out;
2792                }
2793        }
2794
2795        ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2796                                 xs->not_found ? NULL : xs->here);
2797        ret = ocfs2_xa_set(&loc, xi, ctxt);
2798        if (ret) {
2799                if (ret != -ENOSPC)
2800                        mlog_errno(ret);
2801                goto out;
2802        }
2803        xs->here = loc.xl_entry;
2804
2805out:
2806        up_write(&oi->ip_alloc_sem);
2807
2808        return ret;
2809}
2810
2811/*
2812 * ocfs2_xattr_block_find()
2813 *
2814 * Find extended attribute in external block and
2815 * fill search info into struct ocfs2_xattr_search.
2816 */
2817static int ocfs2_xattr_block_find(struct inode *inode,
2818                                  int name_index,
2819                                  const char *name,
2820                                  struct ocfs2_xattr_search *xs)
2821{
2822        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2823        struct buffer_head *blk_bh = NULL;
2824        struct ocfs2_xattr_block *xb;
2825        int ret = 0;
2826
2827        if (!di->i_xattr_loc)
2828                return ret;
2829
2830        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2831                                     &blk_bh);
2832        if (ret < 0) {
2833                mlog_errno(ret);
2834                return ret;
2835        }
2836
2837        xs->xattr_bh = blk_bh;
2838        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2839
2840        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2841                xs->header = &xb->xb_attrs.xb_header;
2842                xs->base = (void *)xs->header;
2843                xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2844                xs->here = xs->header->xh_entries;
2845
2846                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2847        } else
2848                ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2849                                                   name_index,
2850                                                   name, xs);
2851
2852        if (ret && ret != -ENODATA) {
2853                xs->xattr_bh = NULL;
2854                goto cleanup;
2855        }
2856        xs->not_found = ret;
2857        return 0;
2858cleanup:
2859        brelse(blk_bh);
2860
2861        return ret;
2862}
2863
2864static int ocfs2_create_xattr_block(struct inode *inode,
2865                                    struct buffer_head *inode_bh,
2866                                    struct ocfs2_xattr_set_ctxt *ctxt,
2867                                    int indexed,
2868                                    struct buffer_head **ret_bh)
2869{
2870        int ret;
2871        u16 suballoc_bit_start;
2872        u32 num_got;
2873        u64 suballoc_loc, first_blkno;
2874        struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2875        struct buffer_head *new_bh = NULL;
2876        struct ocfs2_xattr_block *xblk;
2877
2878        ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2879                                      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2880        if (ret < 0) {
2881                mlog_errno(ret);
2882                goto end;
2883        }
2884
2885        ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2886                                   &suballoc_loc, &suballoc_bit_start,
2887                                   &num_got, &first_blkno);
2888        if (ret < 0) {
2889                mlog_errno(ret);
2890                goto end;
2891        }
2892
2893        new_bh = sb_getblk(inode->i_sb, first_blkno);
2894        if (!new_bh) {
2895                ret = -ENOMEM;
2896                mlog_errno(ret);
2897                goto end;
2898        }
2899
2900        ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2901
2902        ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2903                                      new_bh,
2904                                      OCFS2_JOURNAL_ACCESS_CREATE);
2905        if (ret < 0) {
2906                mlog_errno(ret);
2907                goto end;
2908        }
2909
2910        /* Initialize ocfs2_xattr_block */
2911        xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2912        memset(xblk, 0, inode->i_sb->s_blocksize);
2913        strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2914        xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2915        xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2916        xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2917        xblk->xb_fs_generation =
2918                cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2919        xblk->xb_blkno = cpu_to_le64(first_blkno);
2920        if (indexed) {
2921                struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2922                xr->xt_clusters = cpu_to_le32(1);
2923                xr->xt_last_eb_blk = 0;
2924                xr->xt_list.l_tree_depth = 0;
2925                xr->xt_list.l_count = cpu_to_le16(
2926                                        ocfs2_xattr_recs_per_xb(inode->i_sb));
2927                xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2928                xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2929        }
2930        ocfs2_journal_dirty(ctxt->handle, new_bh);
2931
2932        /* Add it to the inode */
2933        di->i_xattr_loc = cpu_to_le64(first_blkno);
2934
2935        spin_lock(&OCFS2_I(inode)->ip_lock);
2936        OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2937        di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2938        spin_unlock(&OCFS2_I(inode)->ip_lock);
2939
2940        ocfs2_journal_dirty(ctxt->handle, inode_bh);
2941
2942        *ret_bh = new_bh;
2943        new_bh = NULL;
2944
2945end:
2946        brelse(new_bh);
2947        return ret;
2948}
2949
2950/*
2951 * ocfs2_xattr_block_set()
2952 *
2953 * Set, replace or remove an extended attribute into external block.
2954 *
2955 */
2956static int ocfs2_xattr_block_set(struct inode *inode,
2957                                 struct ocfs2_xattr_info *xi,
2958                                 struct ocfs2_xattr_search *xs,
2959                                 struct ocfs2_xattr_set_ctxt *ctxt)
2960{
2961        struct buffer_head *new_bh = NULL;
2962        struct ocfs2_xattr_block *xblk = NULL;
2963        int ret;
2964        struct ocfs2_xa_loc loc;
2965
2966        if (!xs->xattr_bh) {
2967                ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2968                                               0, &new_bh);
2969                if (ret) {
2970                        mlog_errno(ret);
2971                        goto end;
2972                }
2973
2974                xs->xattr_bh = new_bh;
2975                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2976                xs->header = &xblk->xb_attrs.xb_header;
2977                xs->base = (void *)xs->header;
2978                xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2979                xs->here = xs->header->xh_entries;
2980        } else
2981                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2982
2983        if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2984                ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2985                                              xs->not_found ? NULL : xs->here);
2986
2987                ret = ocfs2_xa_set(&loc, xi, ctxt);
2988                if (!ret)
2989                        xs->here = loc.xl_entry;
2990                else if ((ret != -ENOSPC) || ctxt->set_abort)
2991                        goto end;
2992                else {
2993                        ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2994                        if (ret)
2995                                goto end;
2996                }
2997        }
2998
2999        if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
3000                ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
3001
3002end:
3003        return ret;
3004}
3005
3006/* Check whether the new xattr can be inserted into the inode. */
3007static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
3008                                       struct ocfs2_xattr_info *xi,
3009                                       struct ocfs2_xattr_search *xs)
3010{
3011        struct ocfs2_xattr_entry *last;
3012        int free, i;
3013        size_t min_offs = xs->end - xs->base;
3014
3015        if (!xs->header)
3016                return 0;
3017
3018        last = xs->header->xh_entries;
3019
3020        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
3021                size_t offs = le16_to_cpu(last->xe_name_offset);
3022                if (offs < min_offs)
3023                        min_offs = offs;
3024                last += 1;
3025        }
3026
3027        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3028        if (free < 0)
3029                return 0;
3030
3031        BUG_ON(!xs->not_found);
3032
3033        if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3034                return 1;
3035
3036        return 0;
3037}
3038
3039static int ocfs2_calc_xattr_set_need(struct inode *inode,
3040                                     struct ocfs2_dinode *di,
3041                                     struct ocfs2_xattr_info *xi,
3042                                     struct ocfs2_xattr_search *xis,
3043                                     struct ocfs2_xattr_search *xbs,
3044                                     int *clusters_need,
3045                                     int *meta_need,
3046                                     int *credits_need)
3047{
3048        int ret = 0, old_in_xb = 0;
3049        int clusters_add = 0, meta_add = 0, credits = 0;
3050        struct buffer_head *bh = NULL;
3051        struct ocfs2_xattr_block *xb = NULL;
3052        struct ocfs2_xattr_entry *xe = NULL;
3053        struct ocfs2_xattr_value_root *xv = NULL;
3054        char *base = NULL;
3055        int name_offset, name_len = 0;
3056        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3057                                                    xi->xi_value_len);
3058        u64 value_size;
3059
3060        /*
3061         * Calculate the clusters we need to write.
3062         * No matter whether we replace an old one or add a new one,
3063         * we need this for writing.
3064         */
3065        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3066                credits += new_clusters *
3067                           ocfs2_clusters_to_blocks(inode->i_sb, 1);
3068
3069        if (xis->not_found && xbs->not_found) {
3070                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3071
3072                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3073                        clusters_add += new_clusters;
3074                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3075                                                        &def_xv.xv.xr_list);
3076                }
3077
3078                goto meta_guess;
3079        }
3080
3081        if (!xis->not_found) {
3082                xe = xis->here;
3083                name_offset = le16_to_cpu(xe->xe_name_offset);
3084                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3085                base = xis->base;
3086                credits += OCFS2_INODE_UPDATE_CREDITS;
3087        } else {
3088                int i, block_off = 0;
3089                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3090                xe = xbs->here;
3091                name_offset = le16_to_cpu(xe->xe_name_offset);
3092                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3093                i = xbs->here - xbs->header->xh_entries;
3094                old_in_xb = 1;
3095
3096                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3097                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3098                                                        bucket_xh(xbs->bucket),
3099                                                        i, &block_off,
3100                                                        &name_offset);
3101                        base = bucket_block(xbs->bucket, block_off);
3102                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3103                } else {
3104                        base = xbs->base;
3105                        credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3106                }
3107        }
3108
3109        /*
3110         * delete a xattr doesn't need metadata and cluster allocation.
3111         * so just calculate the credits and return.
3112         *
3113         * The credits for removing the value tree will be extended
3114         * by ocfs2_remove_extent itself.
3115         */
3116        if (!xi->xi_value) {
3117                if (!ocfs2_xattr_is_local(xe))
3118                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3119
3120                goto out;
3121        }
3122
3123        /* do cluster allocation guess first. */
3124        value_size = le64_to_cpu(xe->xe_value_size);
3125
3126        if (old_in_xb) {
3127                /*
3128                 * In xattr set, we always try to set the xe in inode first,
3129                 * so if it can be inserted into inode successfully, the old
3130                 * one will be removed from the xattr block, and this xattr
3131                 * will be inserted into inode as a new xattr in inode.
3132                 */
3133                if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3134                        clusters_add += new_clusters;
3135                        credits += ocfs2_remove_extent_credits(inode->i_sb) +
3136                                    OCFS2_INODE_UPDATE_CREDITS;
3137                        if (!ocfs2_xattr_is_local(xe))
3138                                credits += ocfs2_calc_extend_credits(
3139                                                        inode->i_sb,
3140                                                        &def_xv.xv.xr_list);
3141                        goto out;
3142                }
3143        }
3144
3145        if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3146                /* the new values will be stored outside. */
3147                u32 old_clusters = 0;
3148
3149                if (!ocfs2_xattr_is_local(xe)) {
3150                        old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3151                                                                 value_size);
3152                        xv = (struct ocfs2_xattr_value_root *)
3153                             (base + name_offset + name_len);
3154                        value_size = OCFS2_XATTR_ROOT_SIZE;
3155                } else
3156                        xv = &def_xv.xv;
3157
3158                if (old_clusters >= new_clusters) {
3159                        credits += ocfs2_remove_extent_credits(inode->i_sb);
3160                        goto out;
3161                } else {
3162                        meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3163                        clusters_add += new_clusters - old_clusters;
3164                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3165                                                             &xv->xr_list);
3166                        if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3167                                goto out;
3168                }
3169        } else {
3170                /*
3171                 * Now the new value will be stored inside. So if the new
3172                 * value is smaller than the size of value root or the old
3173                 * value, we don't need any allocation, otherwise we have
3174                 * to guess metadata allocation.
3175                 */
3176                if ((ocfs2_xattr_is_local(xe) &&
3177                     (value_size >= xi->xi_value_len)) ||
3178                    (!ocfs2_xattr_is_local(xe) &&
3179                     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3180                        goto out;
3181        }
3182
3183meta_guess:
3184        /* calculate metadata allocation. */
3185        if (di->i_xattr_loc) {
3186                if (!xbs->xattr_bh) {
3187                        ret = ocfs2_read_xattr_block(inode,
3188                                                     le64_to_cpu(di->i_xattr_loc),
3189                                                     &bh);
3190                        if (ret) {
3191                                mlog_errno(ret);
3192                                goto out;
3193                        }
3194
3195                        xb = (struct ocfs2_xattr_block *)bh->b_data;
3196                } else
3197                        xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3198
3199                /*
3200                 * If there is already an xattr tree, good, we can calculate
3201                 * like other b-trees. Otherwise we may have the chance of
3202                 * create a tree, the credit calculation is borrowed from
3203                 * ocfs2_calc_extend_credits with root_el = NULL. And the
3204                 * new tree will be cluster based, so no meta is needed.
3205                 */
3206                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3207                        struct ocfs2_extent_list *el =
3208                                 &xb->xb_attrs.xb_root.xt_list;
3209                        meta_add += ocfs2_extend_meta_needed(el);
3210                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3211                                                             el);
3212                } else
3213                        credits += OCFS2_SUBALLOC_ALLOC + 1;
3214
3215                /*
3216                 * This cluster will be used either for new bucket or for
3217                 * new xattr block.
3218                 * If the cluster size is the same as the bucket size, one
3219                 * more is needed since we may need to extend the bucket
3220                 * also.
3221                 */
3222                clusters_add += 1;
3223                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3224                if (OCFS2_XATTR_BUCKET_SIZE ==
3225                        OCFS2_SB(inode->i_sb)->s_clustersize) {
3226                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3227                        clusters_add += 1;
3228                }
3229        } else {
3230                credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3231                if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3232                        struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
3233                        meta_add += ocfs2_extend_meta_needed(el);
3234                        credits += ocfs2_calc_extend_credits(inode->i_sb,
3235                                                             el);
3236                } else {
3237                        meta_add += 1;
3238                }
3239        }
3240out:
3241        if (clusters_need)
3242                *clusters_need = clusters_add;
3243        if (meta_need)
3244                *meta_need = meta_add;
3245        if (credits_need)
3246                *credits_need = credits;
3247        brelse(bh);
3248        return ret;
3249}
3250
3251static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3252                                     struct ocfs2_dinode *di,
3253                                     struct ocfs2_xattr_info *xi,
3254                                     struct ocfs2_xattr_search *xis,
3255                                     struct ocfs2_xattr_search *xbs,
3256                                     struct ocfs2_xattr_set_ctxt *ctxt,
3257                                     int extra_meta,
3258                                     int *credits)
3259{
3260        int clusters_add, meta_add, ret;
3261        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3262
3263        memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3264
3265        ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3266
3267        ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3268                                        &clusters_add, &meta_add, credits);
3269        if (ret) {
3270                mlog_errno(ret);
3271                return ret;
3272        }
3273
3274        meta_add += extra_meta;
3275        trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
3276                                        clusters_add, *credits);
3277
3278        if (meta_add) {
3279                ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3280                                                        &ctxt->meta_ac);
3281                if (ret) {
3282                        mlog_errno(ret);
3283                        goto out;
3284                }
3285        }
3286
3287        if (clusters_add) {
3288                ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3289                if (ret)
3290                        mlog_errno(ret);
3291        }
3292out:
3293        if (ret) {
3294                if (ctxt->meta_ac) {
3295                        ocfs2_free_alloc_context(ctxt->meta_ac);
3296                        ctxt->meta_ac = NULL;
3297                }
3298
3299                /*
3300                 * We cannot have an error and a non null ctxt->data_ac.
3301                 */
3302        }
3303
3304        return ret;
3305}
3306
3307static int __ocfs2_xattr_set_handle(struct inode *inode,
3308                                    struct ocfs2_dinode *di,
3309                                    struct ocfs2_xattr_info *xi,
3310                                    struct ocfs2_xattr_search *xis,
3311                                    struct ocfs2_xattr_search *xbs,
3312                                    struct ocfs2_xattr_set_ctxt *ctxt)
3313{
3314        int ret = 0, credits, old_found;
3315
3316        if (!xi->xi_value) {
3317                /* Remove existing extended attribute */
3318                if (!xis->not_found)
3319                        ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3320                else if (!xbs->not_found)
3321                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322        } else {
3323                /* We always try to set extended attribute into inode first*/
3324                ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3325                if (!ret && !xbs->not_found) {
3326                        /*
3327                         * If succeed and that extended attribute existing in
3328                         * external block, then we will remove it.
3329                         */
3330                        xi->xi_value = NULL;
3331                        xi->xi_value_len = 0;
3332
3333                        old_found = xis->not_found;
3334                        xis->not_found = -ENODATA;
3335                        ret = ocfs2_calc_xattr_set_need(inode,
3336                                                        di,
3337                                                        xi,
3338                                                        xis,
3339                                                        xbs,
3340                                                        NULL,
3341                                                        NULL,
3342                                                        &credits);
3343                        xis->not_found = old_found;
3344                        if (ret) {
3345                                mlog_errno(ret);
3346                                goto out;
3347                        }
3348
3349                        ret = ocfs2_extend_trans(ctxt->handle, credits);
3350                        if (ret) {
3351                                mlog_errno(ret);
3352                                goto out;
3353                        }
3354                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3355                } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3356                        if (di->i_xattr_loc && !xbs->xattr_bh) {
3357                                ret = ocfs2_xattr_block_find(inode,
3358                                                             xi->xi_name_index,
3359                                                             xi->xi_name, xbs);
3360                                if (ret)
3361                                        goto out;
3362
3363                                old_found = xis->not_found;
3364                                xis->not_found = -ENODATA;
3365                                ret = ocfs2_calc_xattr_set_need(inode,
3366                                                                di,
3367                                                                xi,
3368                                                                xis,
3369                                                                xbs,
3370                                                                NULL,
3371                                                                NULL,
3372                                                                &credits);
3373                                xis->not_found = old_found;
3374                                if (ret) {
3375                                        mlog_errno(ret);
3376                                        goto out;
3377                                }
3378
3379                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3380                                if (ret) {
3381                                        mlog_errno(ret);
3382                                        goto out;
3383                                }
3384                        }
3385                        /*
3386                         * If no space in inode, we will set extended attribute
3387                         * into external block.
3388                         */
3389                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3390                        if (ret)
3391                                goto out;
3392                        if (!xis->not_found) {
3393                                /*
3394                                 * If succeed and that extended attribute
3395                                 * existing in inode, we will remove it.
3396                                 */
3397                                xi->xi_value = NULL;
3398                                xi->xi_value_len = 0;
3399                                xbs->not_found = -ENODATA;
3400                                ret = ocfs2_calc_xattr_set_need(inode,
3401                                                                di,
3402                                                                xi,
3403                                                                xis,
3404                                                                xbs,
3405                                                                NULL,
3406                                                                NULL,
3407                                                                &credits);
3408                                if (ret) {
3409                                        mlog_errno(ret);
3410                                        goto out;
3411                                }
3412
3413                                ret = ocfs2_extend_trans(ctxt->handle, credits);
3414                                if (ret) {
3415                                        mlog_errno(ret);
3416                                        goto out;
3417                                }
3418                                ret = ocfs2_xattr_ibody_set(inode, xi,
3419                                                            xis, ctxt);
3420                        }
3421                }
3422        }
3423
3424        if (!ret) {
3425                /* Update inode ctime. */
3426                ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3427                                              xis->inode_bh,
3428                                              OCFS2_JOURNAL_ACCESS_WRITE);
3429                if (ret) {
3430                        mlog_errno(ret);
3431                        goto out;
3432                }
3433
3434                inode->i_ctime = CURRENT_TIME;
3435                di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3436                di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3437                ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3438        }
3439out:
3440        return ret;
3441}
3442
3443/*
3444 * This function only called duing creating inode
3445 * for init security/acl xattrs of the new inode.
3446 * All transanction credits have been reserved in mknod.
3447 */
3448int ocfs2_xattr_set_handle(handle_t *handle,
3449                           struct inode *inode,
3450                           struct buffer_head *di_bh,
3451                           int name_index,
3452                           const char *name,
3453                           const void *value,
3454                           size_t value_len,
3455                           int flags,
3456                           struct ocfs2_alloc_context *meta_ac,
3457                           struct ocfs2_alloc_context *data_ac)
3458{
3459        struct ocfs2_dinode *di;
3460        int ret;
3461
3462        struct ocfs2_xattr_info xi = {
3463                .xi_name_index = name_index,
3464                .xi_name = name,
3465                .xi_name_len = strlen(name),
3466                .xi_value = value,
3467                .xi_value_len = value_len,
3468        };
3469
3470        struct ocfs2_xattr_search xis = {
3471                .not_found = -ENODATA,
3472        };
3473
3474        struct ocfs2_xattr_search xbs = {
3475                .not_found = -ENODATA,
3476        };
3477
3478        struct ocfs2_xattr_set_ctxt ctxt = {
3479                .handle = handle,
3480                .meta_ac = meta_ac,
3481                .data_ac = data_ac,
3482        };
3483
3484        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3485                return -EOPNOTSUPP;
3486
3487        /*
3488         * In extreme situation, may need xattr bucket when
3489         * block size is too small. And we have already reserved
3490         * the credits for bucket in mknod.
3491         */
3492        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3493                xbs.bucket = ocfs2_xattr_bucket_new(inode);
3494                if (!xbs.bucket) {
3495                        mlog_errno(-ENOMEM);
3496                        return -ENOMEM;
3497                }
3498        }
3499
3500        xis.inode_bh = xbs.inode_bh = di_bh;
3501        di = (struct ocfs2_dinode *)di_bh->b_data;
3502
3503        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3504
3505        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3506        if (ret)
3507                goto cleanup;
3508        if (xis.not_found) {
3509                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3510                if (ret)
3511                        goto cleanup;
3512        }
3513
3514        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3515
3516cleanup:
3517        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3518        brelse(xbs.xattr_bh);
3519        ocfs2_xattr_bucket_free(xbs.bucket);
3520
3521        return ret;
3522}
3523
3524/*
3525 * ocfs2_xattr_set()
3526 *
3527 * Set, replace or remove an extended attribute for this inode.
3528 * value is NULL to remove an existing extended attribute, else either
3529 * create or replace an extended attribute.
3530 */
3531int ocfs2_xattr_set(struct inode *inode,
3532                    int name_index,
3533                    const char *name,
3534                    const void *value,
3535                    size_t value_len,
3536                    int flags)
3537{
3538        struct buffer_head *di_bh = NULL;
3539        struct ocfs2_dinode *di;
3540        int ret, credits, ref_meta = 0, ref_credits = 0;
3541        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3542        struct inode *tl_inode = osb->osb_tl_inode;
3543        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
3544        struct ocfs2_refcount_tree *ref_tree = NULL;
3545
3546        struct ocfs2_xattr_info xi = {
3547                .xi_name_index = name_index,
3548                .xi_name = name,
3549                .xi_name_len = strlen(name),
3550                .xi_value = value,
3551                .xi_value_len = value_len,
3552        };
3553
3554        struct ocfs2_xattr_search xis = {
3555                .not_found = -ENODATA,
3556        };
3557
3558        struct ocfs2_xattr_search xbs = {
3559                .not_found = -ENODATA,
3560        };
3561
3562        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3563                return -EOPNOTSUPP;
3564
3565        /*
3566         * Only xbs will be used on indexed trees.  xis doesn't need a
3567         * bucket.
3568         */
3569        xbs.bucket = ocfs2_xattr_bucket_new(inode);
3570        if (!xbs.bucket) {
3571                mlog_errno(-ENOMEM);
3572                return -ENOMEM;
3573        }
3574
3575        ret = ocfs2_inode_lock(inode, &di_bh, 1);
3576        if (ret < 0) {
3577                mlog_errno(ret);
3578                goto cleanup_nolock;
3579        }
3580        xis.inode_bh = xbs.inode_bh = di_bh;
3581        di = (struct ocfs2_dinode *)di_bh->b_data;
3582
3583        down_write(&OCFS2_I(inode)->ip_xattr_sem);
3584        /*
3585         * Scan inode and external block to find the same name
3586         * extended attribute and collect search information.
3587         */
3588        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3589        if (ret)
3590                goto cleanup;
3591        if (xis.not_found) {
3592                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3593                if (ret)
3594                        goto cleanup;
3595        }
3596
3597        if (xis.not_found && xbs.not_found) {
3598                ret = -ENODATA;
3599                if (flags & XATTR_REPLACE)
3600                        goto cleanup;
3601                ret = 0;
3602                if (!value)
3603                        goto cleanup;
3604        } else {
3605                ret = -EEXIST;
3606                if (flags & XATTR_CREATE)
3607                        goto cleanup;
3608        }
3609
3610        /* Check whether the value is refcounted and do some preparation. */
3611        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3612            (!xis.not_found || !xbs.not_found)) {
3613                ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3614                                                   &xis, &xbs, &ref_tree,
3615                                                   &ref_meta, &ref_credits);
3616                if (ret) {
3617                        mlog_errno(ret);
3618                        goto cleanup;
3619                }
3620        }
3621
3622        inode_lock(tl_inode);
3623
3624        if (ocfs2_truncate_log_needs_flush(osb)) {
3625                ret = __ocfs2_flush_truncate_log(osb);
3626                if (ret < 0) {
3627                        inode_unlock(tl_inode);
3628                        mlog_errno(ret);
3629                        goto cleanup;
3630                }
3631        }
3632        inode_unlock(tl_inode);
3633
3634        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3635                                        &xbs, &ctxt, ref_meta, &credits);
3636        if (ret) {
3637                mlog_errno(ret);
3638                goto cleanup;
3639        }
3640
3641        /* we need to update inode's ctime field, so add credit for it. */
3642        credits += OCFS2_INODE_UPDATE_CREDITS;
3643        ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3644        if (IS_ERR(ctxt.handle)) {
3645                ret = PTR_ERR(ctxt.handle);
3646                mlog_errno(ret);
3647                goto out_free_ac;
3648        }
3649
3650        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3651        ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
3652
3653        ocfs2_commit_trans(osb, ctxt.handle);
3654
3655out_free_ac:
3656        if (ctxt.data_ac)
3657                ocfs2_free_alloc_context(ctxt.data_ac);
3658        if (ctxt.meta_ac)
3659                ocfs2_free_alloc_context(ctxt.meta_ac);
3660        if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3661                ocfs2_schedule_truncate_log_flush(osb, 1);
3662        ocfs2_run_deallocs(osb, &ctxt.dealloc);
3663
3664cleanup:
3665        if (ref_tree)
3666                ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3667        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3668        if (!value && !ret) {
3669                ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3670                if (ret)
3671                        mlog_errno(ret);
3672        }
3673        ocfs2_inode_unlock(inode, 1);
3674cleanup_nolock:
3675        brelse(di_bh);
3676        brelse(xbs.xattr_bh);
3677        ocfs2_xattr_bucket_free(xbs.bucket);
3678
3679        return ret;
3680}
3681
3682/*
3683 * Find the xattr extent rec which may contains name_hash.
3684 * e_cpos will be the first name hash of the xattr rec.
3685 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3686 */
3687static int ocfs2_xattr_get_rec(struct inode *inode,
3688                               u32 name_hash,
3689                               u64 *p_blkno,
3690                               u32 *e_cpos,
3691                               u32 *num_clusters,
3692                               struct ocfs2_extent_list *el)
3693{
3694        int ret = 0, i;
3695        struct buffer_head *eb_bh = NULL;
3696        struct ocfs2_extent_block *eb;
3697        struct ocfs2_extent_rec *rec = NULL;
3698        u64 e_blkno = 0;
3699
3700        if (el->l_tree_depth) {
3701                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3702                                      &eb_bh);
3703                if (ret) {
3704                        mlog_errno(ret);
3705                        goto out;
3706                }
3707
3708                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3709                el = &eb->h_list;
3710
3711                if (el->l_tree_depth) {
3712                        ret = ocfs2_error(inode->i_sb,
3713                                          "Inode %lu has non zero tree depth in xattr tree block %llu\n",
3714                                          inode->i_ino,
3715                                          (unsigned long long)eb_bh->b_blocknr);
3716                        goto out;
3717                }
3718        }
3719
3720        for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3721                rec = &el->l_recs[i];
3722
3723                if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3724                        e_blkno = le64_to_cpu(rec->e_blkno);
3725                        break;
3726                }
3727        }
3728
3729        if (!e_blkno) {
3730                ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
3731                                  inode->i_ino,
3732                                  le32_to_cpu(rec->e_cpos),
3733                                  ocfs2_rec_clusters(el, rec));
3734                goto out;
3735        }
3736
3737        *p_blkno = le64_to_cpu(rec->e_blkno);
3738        *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3739        if (e_cpos)
3740                *e_cpos = le32_to_cpu(rec->e_cpos);
3741out:
3742        brelse(eb_bh);
3743        return ret;
3744}
3745
3746typedef int (xattr_bucket_func)(struct inode *inode,
3747                                struct ocfs2_xattr_bucket *bucket,
3748                                void *para);
3749
3750static int ocfs2_find_xe_in_bucket(struct inode *inode,
3751                                   struct ocfs2_xattr_bucket *bucket,
3752                                   int name_index,
3753                                   const char *name,
3754                                   u32 name_hash,
3755                                   u16 *xe_index,
3756                                   int *found)
3757{
3758        int i, ret = 0, cmp = 1, block_off, new_offset;
3759        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3760        size_t name_len = strlen(name);
3761        struct ocfs2_xattr_entry *xe = NULL;
3762        char *xe_name;
3763
3764        /*
3765         * We don't use binary search in the bucket because there
3766         * may be multiple entries with the same name hash.
3767         */
3768        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3769                xe = &xh->xh_entries[i];
3770
3771                if (name_hash > le32_to_cpu(xe->xe_name_hash))
3772                        continue;
3773                else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3774                        break;
3775
3776                cmp = name_index - ocfs2_xattr_get_type(xe);
3777                if (!cmp)
3778                        cmp = name_len - xe->xe_name_len;
3779                if (cmp)
3780                        continue;
3781
3782                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3783                                                        xh,
3784                                                        i,
3785                                                        &block_off,
3786                                                        &new_offset);
3787                if (ret) {
3788                        mlog_errno(ret);
3789                        break;
3790                }
3791
3792
3793                xe_name = bucket_block(bucket, block_off) + new_offset;
3794                if (!memcmp(name, xe_name, name_len)) {
3795                        *xe_index = i;
3796                        *found = 1;
3797                        ret = 0;
3798                        break;
3799                }
3800        }
3801
3802        return ret;
3803}
3804
3805/*
3806 * Find the specified xattr entry in a series of buckets.
3807 * This series start from p_blkno and last for num_clusters.
3808 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3809 * the num of the valid buckets.
3810 *
3811 * Return the buffer_head this xattr should reside in. And if the xattr's
3812 * hash is in the gap of 2 buckets, return the lower bucket.
3813 */
3814static int ocfs2_xattr_bucket_find(struct inode *inode,
3815                                   int name_index,
3816                                   const char *name,
3817                                   u32 name_hash,
3818                                   u64 p_blkno,
3819                                   u32 first_hash,
3820                                   u32 num_clusters,
3821                                   struct ocfs2_xattr_search *xs)
3822{
3823        int ret, found = 0;
3824        struct ocfs2_xattr_header *xh = NULL;
3825        struct ocfs2_xattr_entry *xe = NULL;
3826        u16 index = 0;
3827        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3828        int low_bucket = 0, bucket, high_bucket;
3829        struct ocfs2_xattr_bucket *search;
3830        u32 last_hash;
3831        u64 blkno, lower_blkno = 0;
3832
3833        search = ocfs2_xattr_bucket_new(inode);
3834        if (!search) {
3835                ret = -ENOMEM;
3836                mlog_errno(ret);
3837                goto out;
3838        }
3839
3840        ret = ocfs2_read_xattr_bucket(search, p_blkno);
3841        if (ret) {
3842                mlog_errno(ret);
3843                goto out;
3844        }
3845
3846        xh = bucket_xh(search);
3847        high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3848        while (low_bucket <= high_bucket) {
3849                ocfs2_xattr_bucket_relse(search);
3850
3851                bucket = (low_bucket + high_bucket) / 2;
3852                blkno = p_blkno + bucket * blk_per_bucket;
3853                ret = ocfs2_read_xattr_bucket(search, blkno);
3854                if (ret) {
3855                        mlog_errno(ret);
3856                        goto out;
3857                }
3858
3859                xh = bucket_xh(search);
3860                xe = &xh->xh_entries[0];
3861                if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3862                        high_bucket = bucket - 1;
3863                        continue;
3864                }
3865
3866                /*
3867                 * Check whether the hash of the last entry in our
3868                 * bucket is larger than the search one. for an empty
3869                 * bucket, the last one is also the first one.
3870                 */
3871                if (xh->xh_count)
3872                        xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3873
3874                last_hash = le32_to_cpu(xe->xe_name_hash);
3875
3876                /* record lower_blkno which may be the insert place. */
3877                lower_blkno = blkno;
3878
3879                if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3880                        low_bucket = bucket + 1;
3881                        continue;
3882                }
3883
3884                /* the searched xattr should reside in this bucket if exists. */
3885                ret = ocfs2_find_xe_in_bucket(inode, search,
3886                                              name_index, name, name_hash,
3887                                              &index, &found);
3888                if (ret) {
3889                        mlog_errno(ret);
3890                        goto out;
3891                }
3892                break;
3893        }
3894
3895        /*
3896         * Record the bucket we have found.
3897         * When the xattr's hash value is in the gap of 2 buckets, we will
3898         * always set it to the previous bucket.
3899         */
3900        if (!lower_blkno)
3901                lower_blkno = p_blkno;
3902
3903        /* This should be in cache - we just read it during the search */
3904        ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3905        if (ret) {
3906                mlog_errno(ret);
3907                goto out;
3908        }
3909
3910        xs->header = bucket_xh(xs->bucket);
3911        xs->base = bucket_block(xs->bucket, 0);
3912        xs->end = xs->base + inode->i_sb->s_blocksize;
3913
3914        if (found) {
3915                xs->here = &xs->header->xh_entries[index];
3916                trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
3917                        name, name_index, name_hash,
3918                        (unsigned long long)bucket_blkno(xs->bucket),
3919                        index);
3920        } else
3921                ret = -ENODATA;
3922
3923out:
3924        ocfs2_xattr_bucket_free(search);
3925        return ret;
3926}
3927
3928static int ocfs2_xattr_index_block_find(struct inode *inode,
3929                                        struct buffer_head *root_bh,
3930                                        int name_index,
3931                                        const char *name,
3932                                        struct ocfs2_xattr_search *xs)
3933{
3934        int ret;
3935        struct ocfs2_xattr_block *xb =
3936                        (struct ocfs2_xattr_block *)root_bh->b_data;
3937        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3938        struct ocfs2_extent_list *el = &xb_root->xt_list;
3939        u64 p_blkno = 0;
3940        u32 first_hash, num_clusters = 0;
3941        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3942
3943        if (le16_to_cpu(el->l_next_free_rec) == 0)
3944                return -ENODATA;
3945
3946        trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
3947                                        name, name_index, name_hash,
3948                                        (unsigned long long)root_bh->b_blocknr,
3949                                        -1);
3950
3951        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3952                                  &num_clusters, el);
3953        if (ret) {
3954                mlog_errno(ret);
3955                goto out;
3956        }
3957
3958        BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3959
3960        trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
3961                                        name, name_index, first_hash,
3962                                        (unsigned long long)p_blkno,
3963                                        num_clusters);
3964
3965        ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3966                                      p_blkno, first_hash, num_clusters, xs);
3967
3968out:
3969        return ret;
3970}
3971
3972static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3973                                       u64 blkno,
3974                                       u32 clusters,
3975                                       xattr_bucket_func *func,
3976                                       void *para)
3977{
3978        int i, ret = 0;
3979        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3980        u32 num_buckets = clusters * bpc;
3981        struct ocfs2_xattr_bucket *bucket;
3982
3983        bucket = ocfs2_xattr_bucket_new(inode);
3984        if (!bucket) {
3985                mlog_errno(-ENOMEM);
3986                return -ENOMEM;
3987        }
3988
3989        trace_ocfs2_iterate_xattr_buckets(
3990                (unsigned long long)OCFS2_I(inode)->ip_blkno,
3991                (unsigned long long)blkno, clusters);
3992
3993        for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3994                ret = ocfs2_read_xattr_bucket(bucket, blkno);
3995                if (ret) {
3996                        mlog_errno(ret);
3997                        break;
3998                }
3999
4000                /*
4001                 * The real bucket num in this series of blocks is stored
4002                 * in the 1st bucket.
4003                 */
4004                if (i == 0)
4005                        num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
4006
4007                trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
4008                     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
4009                if (func) {
4010                        ret = func(inode, bucket, para);
4011                        if (ret && ret != -ERANGE)
4012                                mlog_errno(ret);
4013                        /* Fall through to bucket_relse() */
4014                }
4015
4016                ocfs2_xattr_bucket_relse(bucket);
4017                if (ret)
4018                        break;
4019        }
4020
4021        ocfs2_xattr_bucket_free(bucket);
4022        return ret;
4023}
4024
4025struct ocfs2_xattr_tree_list {
4026        char *buffer;
4027        size_t buffer_size;
4028        size_t result;
4029};
4030
4031static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4032                                             struct ocfs2_xattr_header *xh,
4033                                             int index,
4034                                             int *block_off,
4035                                             int *new_offset)
4036{
4037        u16 name_offset;
4038
4039        if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4040                return -EINVAL;
4041
4042        name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4043
4044        *block_off = name_offset >> sb->s_blocksize_bits;
4045        *new_offset = name_offset % sb->s_blocksize;
4046
4047        return 0;
4048}
4049
4050static int ocfs2_list_xattr_bucket(struct inode *inode,
4051                                   struct ocfs2_xattr_bucket *bucket,
4052                                   void *para)
4053{
4054        int ret = 0, type;
4055        struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4056        int i, block_off, new_offset;
4057        const char *name;
4058
4059        for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4060                struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4061                type = ocfs2_xattr_get_type(entry);
4062
4063                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4064                                                        bucket_xh(bucket),
4065                                                        i,
4066                                                        &block_off,
4067                                                        &new_offset);
4068                if (ret)
4069                        break;
4070
4071                name = (const char *)bucket_block(bucket, block_off) +
4072                        new_offset;
4073                ret = ocfs2_xattr_list_entry(inode->i_sb,
4074                                             xl->buffer,
4075                                             xl->buffer_size,
4076                                             &xl->result,
4077                                             type, name,
4078                                             entry->xe_name_len);
4079                if (ret)
4080                        break;
4081        }
4082
4083        return ret;
4084}
4085
4086static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4087                                           struct buffer_head *blk_bh,
4088                                           xattr_tree_rec_func *rec_func,
4089                                           void *para)
4090{
4091        struct ocfs2_xattr_block *xb =
4092                        (struct ocfs2_xattr_block *)blk_bh->b_data;
4093        struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4094        int ret = 0;
4095        u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4096        u64 p_blkno = 0;
4097
4098        if (!el->l_next_free_rec || !rec_func)
4099                return 0;
4100
4101        while (name_hash > 0) {
4102                ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4103                                          &e_cpos, &num_clusters, el);
4104                if (ret) {
4105                        mlog_errno(ret);
4106                        break;
4107                }
4108
4109                ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4110                               num_clusters, para);
4111                if (ret) {
4112                        if (ret != -ERANGE)
4113                                mlog_errno(ret);
4114                        break;
4115                }
4116
4117                if (e_cpos == 0)
4118                        break;
4119
4120                name_hash = e_cpos - 1;
4121        }
4122
4123        return ret;
4124
4125}
4126
4127static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4128                                     struct buffer_head *root_bh,
4129                                     u64 blkno, u32 cpos, u32 len, void *para)
4130{
4131        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4132                                           ocfs2_list_xattr_bucket, para);
4133}
4134
4135static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4136                                             struct buffer_head *blk_bh,
4137                                             char *buffer,
4138                                             size_t buffer_size)
4139{
4140        int ret;
4141        struct ocfs2_xattr_tree_list xl = {
4142                .buffer = buffer,
4143                .buffer_size = buffer_size,
4144                .result = 0,
4145        };
4146
4147        ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4148                                              ocfs2_list_xattr_tree_rec, &xl);
4149        if (ret) {
4150                mlog_errno(ret);
4151                goto out;
4152        }
4153
4154        ret = xl.result;
4155out:
4156        return ret;
4157}
4158
4159static int cmp_xe(const void *a, const void *b)
4160{
4161        const struct ocfs2_xattr_entry *l = a, *r = b;
4162        u32 l_hash = le32_to_cpu(l->xe_name_hash);
4163        u32 r_hash = le32_to_cpu(r->xe_name_hash);
4164
4165        if (l_hash > r_hash)
4166                return 1;
4167        if (l_hash < r_hash)
4168                return -1;
4169        return 0;
4170}
4171
4172static void swap_xe(void *a, void *b, int size)
4173{
4174        struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4175
4176        tmp = *l;
4177        memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4178        memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4179}
4180
4181/*
4182 * When the ocfs2_xattr_block is filled up, new bucket will be created
4183 * and all the xattr entries will be moved to the new bucket.
4184 * The header goes at the start of the bucket, and the names+values are
4185 * filled from the end.  This is why *target starts as the last buffer.
4186 * Note: we need to sort the entries since they are not saved in order
4187 * in the ocfs2_xattr_block.
4188 */
4189static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4190                                           struct buffer_head *xb_bh,
4191                                           struct ocfs2_xattr_bucket *bucket)
4192{
4193        int i, blocksize = inode->i_sb->s_blocksize;
4194        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4195        u16 offset, size, off_change;
4196        struct ocfs2_xattr_entry *xe;
4197        struct ocfs2_xattr_block *xb =
4198                                (struct ocfs2_xattr_block *)xb_bh->b_data;
4199        struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4200        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4201        u16 count = le16_to_cpu(xb_xh->xh_count);
4202        char *src = xb_bh->b_data;
4203        char *target = bucket_block(bucket, blks - 1);
4204
4205        trace_ocfs2_cp_xattr_block_to_bucket_begin(
4206                                (unsigned long long)xb_bh->b_blocknr,
4207                                (unsigned long long)bucket_blkno(bucket));
4208
4209        for (i = 0; i < blks; i++)
4210                memset(bucket_block(bucket, i), 0, blocksize);
4211
4212        /*
4213         * Since the xe_name_offset is based on ocfs2_xattr_header,
4214         * there is a offset change corresponding to the change of
4215         * ocfs2_xattr_header's position.
4216         */
4217        off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4218        xe = &xb_xh->xh_entries[count - 1];
4219        offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4220        size = blocksize - offset;
4221
4222        /* copy all the names and values. */
4223        memcpy(target + offset, src + offset, size);
4224
4225        /* Init new header now. */
4226        xh->xh_count = xb_xh->xh_count;
4227        xh->xh_num_buckets = cpu_to_le16(1);
4228        xh->xh_name_value_len = cpu_to_le16(size);
4229        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4230
4231        /* copy all the entries. */
4232        target = bucket_block(bucket, 0);
4233        offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4234        size = count * sizeof(struct ocfs2_xattr_entry);
4235        memcpy(target + offset, (char *)xb_xh + offset, size);
4236
4237        /* Change the xe offset for all the xe because of the move. */
4238        off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4239                 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4240        for (i = 0; i < count; i++)
4241                le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4242
4243        trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
4244
4245        sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4246             cmp_xe, swap_xe);
4247}
4248
4249/*
4250 * After we move xattr from block to index btree, we have to
4251 * update ocfs2_xattr_search to the new xe and base.
4252 *
4253 * When the entry is in xattr block, xattr_bh indicates the storage place.
4254 * While if the entry is in index b-tree, "bucket" indicates the
4255 * real place of the xattr.
4256 */
4257static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4258                                            struct ocfs2_xattr_search *xs,
4259                                            struct buffer_head *old_bh)
4260{
4261        char *buf = old_bh->b_data;
4262        struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4263        struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4264        int i;
4265
4266        xs->header = bucket_xh(xs->bucket);
4267        xs->base = bucket_block(xs->bucket, 0);
4268        xs->end = xs->base + inode->i_sb->s_blocksize;
4269
4270        if (xs->not_found)
4271                return;
4272
4273        i = xs->here - old_xh->xh_entries;
4274        xs->here = &xs->header->xh_entries[i];
4275}
4276
4277static int ocfs2_xattr_create_index_block(struct inode *inode,
4278                                          struct ocfs2_xattr_search *xs,
4279                                          struct ocfs2_xattr_set_ctxt *ctxt)
4280{
4281        int ret;
4282        u32 bit_off, len;
4283        u64 blkno;
4284        handle_t *handle = ctxt->handle;
4285        struct ocfs2_inode_info *oi = OCFS2_I(inode);
4286        struct buffer_head *xb_bh = xs->xattr_bh;
4287        struct ocfs2_xattr_block *xb =
4288                        (struct ocfs2_xattr_block *)xb_bh->b_data;
4289        struct ocfs2_xattr_tree_root *xr;
4290        u16 xb_flags = le16_to_cpu(xb->xb_flags);
4291
4292        trace_ocfs2_xattr_create_index_block_begin(
4293                                (unsigned long long)xb_bh->b_blocknr);
4294
4295        BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4296        BUG_ON(!xs->bucket);
4297
4298        /*
4299         * XXX:
4300         * We can use this lock for now, and maybe move to a dedicated mutex
4301         * if performance becomes a problem later.
4302         */
4303        down_write(&oi->ip_alloc_sem);
4304
4305        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4306                                      OCFS2_JOURNAL_ACCESS_WRITE);
4307        if (ret) {
4308                mlog_errno(ret);
4309                goto out;
4310        }
4311
4312        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4313                                     1, 1, &bit_off, &len);
4314        if (ret) {
4315                mlog_errno(ret);
4316                goto out;
4317        }
4318
4319        /*
4320         * The bucket may spread in many blocks, and
4321         * we will only touch the 1st block and the last block
4322         * in the whole bucket(one for entry and one for data).
4323         */
4324        blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4325
4326        trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
4327
4328        ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
4329        if (ret) {
4330                mlog_errno(ret);
4331                goto out;
4332        }
4333
4334        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4335                                                OCFS2_JOURNAL_ACCESS_CREATE);
4336        if (ret) {
4337                mlog_errno(ret);
4338                goto out;
4339        }
4340
4341        ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4342        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4343
4344        ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4345
4346        /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4347        memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4348               offsetof(struct ocfs2_xattr_block, xb_attrs));
4349
4350        xr = &xb->xb_attrs.xb_root;
4351        xr->xt_clusters = cpu_to_le32(1);
4352        xr->xt_last_eb_blk = 0;
4353        xr->xt_list.l_tree_depth = 0;
4354        xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4355        xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4356
4357        xr->xt_list.l_recs[0].e_cpos = 0;
4358        xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4359        xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4360
4361        xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4362
4363        ocfs2_journal_dirty(handle, xb_bh);
4364
4365out:
4366        up_write(&oi->ip_alloc_sem);
4367
4368        return ret;
4369}
4370
4371static int cmp_xe_offset(const void *a, const void *b)
4372{
4373        const struct ocfs2_xattr_entry *l = a, *r = b;
4374        u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4375        u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4376
4377        if (l_name_offset < r_name_offset)
4378                return 1;
4379        if (l_name_offset > r_name_offset)
4380                return -1;
4381        return 0;
4382}
4383
4384/*
4385 * defrag a xattr bucket if we find that the bucket has some
4386 * holes beteen name/value pairs.
4387 * We will move all the name/value pairs to the end of the bucket
4388 * so that we can spare some space for insertion.
4389 */
4390static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4391                                     handle_t *handle,
4392                                     struct ocfs2_xattr_bucket *bucket)
4393{
4394        int ret, i;
4395        size_t end, offset, len;
4396        struct ocfs2_xattr_header *xh;
4397        char *entries, *buf, *bucket_buf = NULL;
4398        u64 blkno = bucket_blkno(bucket);
4399        u16 xh_free_start;
4400        size_t blocksize = inode->i_sb->s_blocksize;
4401        struct ocfs2_xattr_entry *xe;
4402
4403        /*
4404         * In order to make the operation more efficient and generic,
4405         * we copy all the blocks into a contiguous memory and do the
4406         * defragment there, so if anything is error, we will not touch
4407         * the real block.
4408         */
4409        bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4410        if (!bucket_buf) {
4411                ret = -EIO;
4412                goto out;
4413        }
4414
4415        buf = bucket_buf;
4416        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4417                memcpy(buf, bucket_block(bucket, i), blocksize);
4418
4419        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4420                                                OCFS2_JOURNAL_ACCESS_WRITE);
4421        if (ret < 0) {
4422                mlog_errno(ret);
4423                goto out;
4424        }
4425
4426        xh = (struct ocfs2_xattr_header *)bucket_buf;
4427        entries = (char *)xh->xh_entries;
4428        xh_free_start = le16_to_cpu(xh->xh_free_start);
4429
4430        trace_ocfs2_defrag_xattr_bucket(
4431             (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4432             xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4433
4434        /*
4435         * sort all the entries by their offset.
4436         * the largest will be the first, so that we can
4437         * move them to the end one by one.
4438         */
4439        sort(entries, le16_to_cpu(xh->xh_count),
4440             sizeof(struct ocfs2_xattr_entry),
4441             cmp_xe_offset, swap_xe);
4442
4443        /* Move all name/values to the end of the bucket. */
4444        xe = xh->xh_entries;
4445        end = OCFS2_XATTR_BUCKET_SIZE;
4446        for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4447                offset = le16_to_cpu(xe->xe_name_offset);
4448                len = namevalue_size_xe(xe);
4449
4450                /*
4451                 * We must make sure that the name/value pair
4452                 * exist in the same block. So adjust end to
4453                 * the previous block end if needed.
4454                 */
4455                if (((end - len) / blocksize !=
4456                        (end - 1) / blocksize))
4457                        end = end - end % blocksize;
4458
4459                if (end > offset + len) {
4460                        memmove(bucket_buf + end - len,
4461                                bucket_buf + offset, len);
4462                        xe->xe_name_offset = cpu_to_le16(end - len);
4463                }
4464
4465                mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4466                                "bucket %llu\n", (unsigned long long)blkno);
4467
4468                end -= len;
4469        }
4470
4471        mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4472                        "bucket %llu\n", (unsigned long long)blkno);
4473
4474        if (xh_free_start == end)
4475                goto out;
4476
4477        memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4478        xh->xh_free_start = cpu_to_le16(end);
4479
4480        /* sort the entries by their name_hash. */
4481        sort(entries, le16_to_cpu(xh->xh_count),
4482             sizeof(struct ocfs2_xattr_entry),
4483             cmp_xe, swap_xe);
4484
4485        buf = bucket_buf;
4486        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4487                memcpy(bucket_block(bucket, i), buf, blocksize);
4488        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4489
4490out:
4491        kfree(bucket_buf);
4492        return ret;
4493}
4494
4495/*
4496 * prev_blkno points to the start of an existing extent.  new_blkno
4497 * points to a newly allocated extent.  Because we know each of our
4498 * clusters contains more than bucket, we can easily split one cluster
4499 * at a bucket boundary.  So we take the last cluster of the existing
4500 * extent and split it down the middle.  We move the last half of the
4501 * buckets in the last cluster of the existing extent over to the new
4502 * extent.
4503 *
4504 * first_bh is the buffer at prev_blkno so we can update the existing
4505 * extent's bucket count.  header_bh is the bucket were we were hoping
4506 * to insert our xattr.  If the bucket move places the target in the new
4507 * extent, we'll update first_bh and header_bh after modifying the old
4508 * extent.
4509 *
4510 * first_hash will be set as the 1st xe's name_hash in the new extent.
4511 */
4512static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4513                                               handle_t *handle,
4514                                               struct ocfs2_xattr_bucket *first,
4515                                               struct ocfs2_xattr_bucket *target,
4516                                               u64 new_blkno,
4517                                               u32 num_clusters,
4518                                               u32 *first_hash)
4519{
4520        int ret;
4521        struct super_block *sb = inode->i_sb;
4522        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4523        int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4524        int to_move = num_buckets / 2;
4525        u64 src_blkno;
4526        u64 last_cluster_blkno = bucket_blkno(first) +
4527                ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4528
4529        BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4530        BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4531
4532        trace_ocfs2_mv_xattr_bucket_cross_cluster(
4533                                (unsigned long long)last_cluster_blkno,
4534                                (unsigned long long)new_blkno);
4535
4536        ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4537                                     last_cluster_blkno, new_blkno,
4538                                     to_move, first_hash);
4539        if (ret) {
4540                mlog_errno(ret);
4541                goto out;
4542        }
4543
4544        /* This is the first bucket that got moved */
4545        src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4546
4547        /*
4548         * If the target bucket was part of the moved buckets, we need to
4549         * update first and target.
4550         */
4551        if (bucket_blkno(target) >= src_blkno) {
4552                /* Find the block for the new target bucket */
4553                src_blkno = new_blkno +
4554                        (bucket_blkno(target) - src_blkno);
4555
4556                ocfs2_xattr_bucket_relse(first);
4557                ocfs2_xattr_bucket_relse(target);
4558
4559                /*
4560                 * These shouldn't fail - the buffers are in the
4561                 * journal from ocfs2_cp_xattr_bucket().
4562                 */
4563                ret = ocfs2_read_xattr_bucket(first, new_blkno);
4564                if (ret) {
4565                        mlog_errno(ret);
4566                        goto out;
4567                }
4568                ret = ocfs2_read_xattr_bucket(target, src_blkno);
4569                if (ret)
4570                        mlog_errno(ret);
4571
4572        }
4573
4574out:
4575        return ret;
4576}
4577
4578/*
4579 * Find the suitable pos when we divide a bucket into 2.
4580 * We have to make sure the xattrs with the same hash value exist
4581 * in the same bucket.
4582 *
4583 * If this ocfs2_xattr_header covers more than one hash value, find a
4584 * place where the hash value changes.  Try to find the most even split.
4585 * The most common case is that all entries have different hash values,
4586 * and the first check we make will find a place to split.
4587 */
4588static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4589{
4590        struct ocfs2_xattr_entry *entries = xh->xh_entries;
4591        int count = le16_to_cpu(xh->xh_count);
4592        int delta, middle = count / 2;
4593
4594        /*
4595         * We start at the middle.  Each step gets farther away in both
4596         * directions.  We therefore hit the change in hash value
4597         * nearest to the middle.  Note that this loop does not execute for
4598         * count < 2.
4599         */
4600        for (delta = 0; delta < middle; delta++) {
4601                /* Let's check delta earlier than middle */
4602                if (cmp_xe(&entries[middle - delta - 1],
4603                           &entries[middle - delta]))
4604                        return middle - delta;
4605
4606                /* For even counts, don't walk off the end */
4607                if ((middle + delta + 1) == count)
4608                        continue;
4609
4610                /* Now try delta past middle */
4611                if (cmp_xe(&entries[middle + delta],
4612                           &entries[middle + delta + 1]))
4613                        return middle + delta + 1;
4614        }
4615
4616        /* Every entry had the same hash */
4617        return count;
4618}
4619
4620/*
4621 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4622 * first_hash will record the 1st hash of the new bucket.
4623 *
4624 * Normally half of the xattrs will be moved.  But we have to make
4625 * sure that the xattrs with the same hash value are stored in the
4626 * same bucket. If all the xattrs in this bucket have the same hash
4627 * value, the new bucket will be initialized as an empty one and the
4628 * first_hash will be initialized as (hash_value+1).
4629 */
4630static int ocfs2_divide_xattr_bucket(struct inode *inode,
4631                                    handle_t *handle,
4632                                    u64 blk,
4633                                    u64 new_blk,
4634                                    u32 *first_hash,
4635                                    int new_bucket_head)
4636{
4637        int ret, i;
4638        int count, start, len, name_value_len = 0, name_offset = 0;
4639        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4640        struct ocfs2_xattr_header *xh;
4641        struct ocfs2_xattr_entry *xe;
4642        int blocksize = inode->i_sb->s_blocksize;
4643
4644        trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
4645                                              (unsigned long long)new_blk);
4646
4647        s_bucket = ocfs2_xattr_bucket_new(inode);
4648        t_bucket = ocfs2_xattr_bucket_new(inode);
4649        if (!s_bucket || !t_bucket) {
4650                ret = -ENOMEM;
4651                mlog_errno(ret);
4652                goto out;
4653        }
4654
4655        ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4656        if (ret) {
4657                mlog_errno(ret);
4658                goto out;
4659        }
4660
4661        ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4662                                                OCFS2_JOURNAL_ACCESS_WRITE);
4663        if (ret) {
4664                mlog_errno(ret);
4665                goto out;
4666        }
4667
4668        /*
4669         * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4670         * there's no need to read it.
4671         */
4672        ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
4673        if (ret) {
4674                mlog_errno(ret);
4675                goto out;
4676        }
4677
4678        /*
4679         * Hey, if we're overwriting t_bucket, what difference does
4680         * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4681         * same part of ocfs2_cp_xattr_bucket().
4682         */
4683        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4684                                                new_bucket_head ?
4685                                                OCFS2_JOURNAL_ACCESS_CREATE :
4686                                                OCFS2_JOURNAL_ACCESS_WRITE);
4687        if (ret) {
4688                mlog_errno(ret);
4689                goto out;
4690        }
4691
4692        xh = bucket_xh(s_bucket);
4693        count = le16_to_cpu(xh->xh_count);
4694        start = ocfs2_xattr_find_divide_pos(xh);
4695
4696        if (start == count) {
4697                xe = &xh->xh_entries[start-1];
4698
4699                /*
4700                 * initialized a new empty bucket here.
4701                 * The hash value is set as one larger than
4702                 * that of the last entry in the previous bucket.
4703                 */
4704                for (i = 0; i < t_bucket->bu_blocks; i++)
4705                        memset(bucket_block(t_bucket, i), 0, blocksize);
4706
4707                xh = bucket_xh(t_bucket);
4708                xh->xh_free_start = cpu_to_le16(blocksize);
4709                xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4710                le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4711
4712                goto set_num_buckets;
4713        }
4714
4715        /* copy the whole bucket to the new first. */
4716        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4717
4718        /* update the new bucket. */
4719        xh = bucket_xh(t_bucket);
4720
4721        /*
4722         * Calculate the total name/value len and xh_free_start for
4723         * the old bucket first.
4724         */
4725        name_offset = OCFS2_XATTR_BUCKET_SIZE;
4726        name_value_len = 0;
4727        for (i = 0; i < start; i++) {
4728                xe = &xh->xh_entries[i];
4729                name_value_len += namevalue_size_xe(xe);
4730                if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4731                        name_offset = le16_to_cpu(xe->xe_name_offset);
4732        }
4733
4734        /*
4735         * Now begin the modification to the new bucket.
4736         *
4737         * In the new bucket, We just move the xattr entry to the beginning
4738         * and don't touch the name/value. So there will be some holes in the
4739         * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4740         * called.
4741         */
4742        xe = &xh->xh_entries[start];
4743        len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4744        trace_ocfs2_divide_xattr_bucket_move(len,
4745                        (int)((char *)xe - (char *)xh),
4746                        (int)((char *)xh->xh_entries - (char *)xh));
4747        memmove((char *)xh->xh_entries, (char *)xe, len);
4748        xe = &xh->xh_entries[count - start];
4749        len = sizeof(struct ocfs2_xattr_entry) * start;
4750        memset((char *)xe, 0, len);
4751
4752        le16_add_cpu(&xh->xh_count, -start);
4753        le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4754
4755        /* Calculate xh_free_start for the new bucket. */
4756        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4757        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4758                xe = &xh->xh_entries[i];
4759                if (le16_to_cpu(xe->xe_name_offset) <
4760                    le16_to_cpu(xh->xh_free_start))
4761                        xh->xh_free_start = xe->xe_name_offset;
4762        }
4763
4764set_num_buckets:
4765        /* set xh->xh_num_buckets for the new xh. */
4766        if (new_bucket_head)
4767                xh->xh_num_buckets = cpu_to_le16(1);
4768        else
4769                xh->xh_num_buckets = 0;
4770
4771        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4772
4773        /* store the first_hash of the new bucket. */
4774        if (first_hash)
4775                *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4776
4777        /*
4778         * Now only update the 1st block of the old bucket.  If we
4779         * just added a new empty bucket, there is no need to modify
4780         * it.
4781         */
4782        if (start == count)
4783                goto out;
4784
4785        xh = bucket_xh(s_bucket);
4786        memset(&xh->xh_entries[start], 0,
4787               sizeof(struct ocfs2_xattr_entry) * (count - start));
4788        xh->xh_count = cpu_to_le16(start);
4789        xh->xh_free_start = cpu_to_le16(name_offset);
4790        xh->xh_name_value_len = cpu_to_le16(name_value_len);
4791
4792        ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4793
4794out:
4795        ocfs2_xattr_bucket_free(s_bucket);
4796        ocfs2_xattr_bucket_free(t_bucket);
4797
4798        return ret;
4799}
4800
4801/*
4802 * Copy xattr from one bucket to another bucket.
4803 *
4804 * The caller must make sure that the journal transaction
4805 * has enough space for journaling.
4806 */
4807static int ocfs2_cp_xattr_bucket(struct inode *inode,
4808                                 handle_t *handle,
4809                                 u64 s_blkno,
4810                                 u64 t_blkno,
4811                                 int t_is_new)
4812{
4813        int ret;
4814        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4815
4816        BUG_ON(s_blkno == t_blkno);
4817
4818        trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
4819                                    (unsigned long long)t_blkno,
4820                                    t_is_new);
4821
4822        s_bucket = ocfs2_xattr_bucket_new(inode);
4823        t_bucket = ocfs2_xattr_bucket_new(inode);
4824        if (!s_bucket || !t_bucket) {
4825                ret = -ENOMEM;
4826                mlog_errno(ret);
4827                goto out;
4828        }
4829
4830        ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4831        if (ret)
4832                goto out;
4833
4834        /*
4835         * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4836         * there's no need to read it.
4837         */
4838        ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
4839        if (ret)
4840                goto out;
4841
4842        /*
4843         * Hey, if we're overwriting t_bucket, what difference does
4844         * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4845         * cluster to fill, we came here from
4846         * ocfs2_mv_xattr_buckets(), and it is really new -
4847         * ACCESS_CREATE is required.  But we also might have moved data
4848         * out of t_bucket before extending back into it.
4849         * ocfs2_add_new_xattr_bucket() can do this - its call to
4850         * ocfs2_add_new_xattr_cluster() may have created a new extent
4851         * and copied out the end of the old extent.  Then it re-extends
4852         * the old extent back to create space for new xattrs.  That's
4853         * how we get here, and the bucket isn't really new.
4854         */
4855        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4856                                                t_is_new ?
4857                                                OCFS2_JOURNAL_ACCESS_CREATE :
4858                                                OCFS2_JOURNAL_ACCESS_WRITE);
4859        if (ret)
4860                goto out;
4861
4862        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4863        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4864
4865out:
4866        ocfs2_xattr_bucket_free(t_bucket);
4867        ocfs2_xattr_bucket_free(s_bucket);
4868
4869        return ret;
4870}
4871
4872/*
4873 * src_blk points to the start of an existing extent.  last_blk points to
4874 * last cluster in that extent.  to_blk points to a newly allocated
4875 * extent.  We copy the buckets from the cluster at last_blk to the new
4876 * extent.  If start_bucket is non-zero, we skip that many buckets before
4877 * we start copying.  The new extent's xh_num_buckets gets set to the
4878 * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4879 * by the same amount.
4880 */
4881static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4882                                  u64 src_blk, u64 last_blk, u64 to_blk,
4883                                  unsigned int start_bucket,
4884                                  u32 *first_hash)
4885{
4886        int i, ret, credits;
4887        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4888        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4889        int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4890        struct ocfs2_xattr_bucket *old_first, *new_first;
4891
4892        trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
4893                                     (unsigned long long)to_blk);
4894
4895        BUG_ON(start_bucket >= num_buckets);
4896        if (start_bucket) {
4897                num_buckets -= start_bucket;
4898                last_blk += (start_bucket * blks_per_bucket);
4899        }
4900
4901        /* The first bucket of the original extent */
4902        old_first = ocfs2_xattr_bucket_new(inode);
4903        /* The first bucket of the new extent */
4904        new_first = ocfs2_xattr_bucket_new(inode);
4905        if (!old_first || !new_first) {
4906                ret = -ENOMEM;
4907                mlog_errno(ret);
4908                goto out;
4909        }
4910
4911        ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4912        if (ret) {
4913                mlog_errno(ret);
4914                goto out;
4915        }
4916
4917        /*
4918         * We need to update the first bucket of the old extent and all
4919         * the buckets going to the new extent.
4920         */
4921        credits = ((num_buckets + 1) * blks_per_bucket);
4922        ret = ocfs2_extend_trans(handle, credits);
4923        if (ret) {
4924                mlog_errno(ret);
4925                goto out;
4926        }
4927
4928        ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4929                                                OCFS2_JOURNAL_ACCESS_WRITE);
4930        if (ret) {
4931                mlog_errno(ret);
4932                goto out;
4933        }
4934
4935        for (i = 0; i < num_buckets; i++) {
4936                ret = ocfs2_cp_xattr_bucket(inode, handle,
4937                                            last_blk + (i * blks_per_bucket),
4938                                            to_blk + (i * blks_per_bucket),
4939                                            1);
4940                if (ret) {
4941                        mlog_errno(ret);
4942                        goto out;
4943                }
4944        }
4945
4946        /*
4947         * Get the new bucket ready before we dirty anything
4948         * (This actually shouldn't fail, because we already dirtied
4949         * it once in ocfs2_cp_xattr_bucket()).
4950         */
4951        ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4952        if (ret) {
4953                mlog_errno(ret);
4954                goto out;
4955        }
4956        ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4957                                                OCFS2_JOURNAL_ACCESS_WRITE);
4958        if (ret) {
4959                mlog_errno(ret);
4960                goto out;
4961        }
4962
4963        /* Now update the headers */
4964        le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4965        ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4966
4967        bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4968        ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4969
4970        if (first_hash)
4971                *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4972
4973out:
4974        ocfs2_xattr_bucket_free(new_first);
4975        ocfs2_xattr_bucket_free(old_first);
4976        return ret;
4977}
4978
4979/*
4980 * Move some xattrs in this cluster to the new cluster.
4981 * This function should only be called when bucket size == cluster size.
4982 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4983 */
4984static int ocfs2_divide_xattr_cluster(struct inode *inode,
4985                                      handle_t *handle,
4986                                      u64 prev_blk,
4987                                      u64 new_blk,
4988                                      u32 *first_hash)
4989{
4990        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4991        int ret, credits = 2 * blk_per_bucket;
4992
4993        BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4994
4995        ret = ocfs2_extend_trans(handle, credits);
4996        if (ret) {
4997                mlog_errno(ret);
4998                return ret;
4999        }
5000
5001        /* Move half of the xattr in start_blk to the next bucket. */
5002        return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
5003                                          new_blk, first_hash, 1);
5004}
5005
5006/*
5007 * Move some xattrs from the old cluster to the new one since they are not
5008 * contiguous in ocfs2 xattr tree.
5009 *
5010 * new_blk starts a new separate cluster, and we will move some xattrs from
5011 * prev_blk to it. v_start will be set as the first name hash value in this
5012 * new cluster so that it can be used as e_cpos during tree insertion and
5013 * don't collide with our original b-tree operations. first_bh and header_bh
5014 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
5015 * to extend the insert bucket.
5016 *
5017 * The problem is how much xattr should we move to the new one and when should
5018 * we update first_bh and header_bh?
5019 * 1. If cluster size > bucket size, that means the previous cluster has more
5020 *    than 1 bucket, so just move half nums of bucket into the new cluster and
5021 *    update the first_bh and header_bh if the insert bucket has been moved
5022 *    to the new cluster.
5023 * 2. If cluster_size == bucket_size:
5024 *    a) If the previous extent rec has more than one cluster and the insert
5025 *       place isn't in the last cluster, copy the entire last cluster to the
5026 *       new one. This time, we don't need to upate the first_bh and header_bh
5027 *       since they will not be moved into the new cluster.
5028 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5029 *       the new one. And we set the extend flag to zero if the insert place is
5030 *       moved into the new allocated cluster since no extend is needed.
5031 */
5032static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5033                                            handle_t *handle,
5034                                            struct ocfs2_xattr_bucket *first,
5035                                            struct ocfs2_xattr_bucket *target,
5036                                            u64 new_blk,
5037                                            u32 prev_clusters,
5038                                            u32 *v_start,
5039                                            int *extend)
5040{
5041        int ret;
5042
5043        trace_ocfs2_adjust_xattr_cross_cluster(
5044                        (unsigned long long)bucket_blkno(first),
5045                        (unsigned long long)new_blk, prev_clusters);
5046
5047        if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5048                ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5049                                                          handle,
5050                                                          first, target,
5051                                                          new_blk,
5052                                                          prev_clusters,
5053                                                          v_start);
5054                if (ret)
5055                        mlog_errno(ret);
5056        } else {
5057                /* The start of the last cluster in the first extent */
5058                u64 last_blk = bucket_blkno(first) +
5059                        ((prev_clusters - 1) *
5060                         ocfs2_clusters_to_blocks(inode->i_sb, 1));
5061
5062                if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5063                        ret = ocfs2_mv_xattr_buckets(inode, handle,
5064                                                     bucket_blkno(first),
5065                                                     last_blk, new_blk, 0,
5066                                                     v_start);
5067                        if (ret)
5068                                mlog_errno(ret);
5069                } else {
5070                        ret = ocfs2_divide_xattr_cluster(inode, handle,
5071                                                         last_blk, new_blk,
5072                                                         v_start);
5073                        if (ret)
5074                                mlog_errno(ret);
5075
5076                        if ((bucket_blkno(target) == last_blk) && extend)
5077                                *extend = 0;
5078                }
5079        }
5080
5081        return ret;
5082}
5083
5084/*
5085 * Add a new cluster for xattr storage.
5086 *
5087 * If the new cluster is contiguous with the previous one, it will be
5088 * appended to the same extent record, and num_clusters will be updated.
5089 * If not, we will insert a new extent for it and move some xattrs in
5090 * the last cluster into the new allocated one.
5091 * We also need to limit the maximum size of a btree leaf, otherwise we'll
5092 * lose the benefits of hashing because we'll have to search large leaves.
5093 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5094 * if it's bigger).
5095 *
5096 * first_bh is the first block of the previous extent rec and header_bh
5097 * indicates the bucket we will insert the new xattrs. They will be updated
5098 * when the header_bh is moved into the new cluster.
5099 */
5100static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5101                                       struct buffer_head *root_bh,
5102                                       struct ocfs2_xattr_bucket *first,
5103                                       struct ocfs2_xattr_bucket *target,
5104                                       u32 *num_clusters,
5105                                       u32 prev_cpos,
5106                                       int *extend,
5107                                       struct ocfs2_xattr_set_ctxt *ctxt)
5108{
5109        int ret;
5110        u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5111        u32 prev_clusters = *num_clusters;
5112        u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5113        u64 block;
5114        handle_t *handle = ctxt->handle;
5115        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5116        struct ocfs2_extent_tree et;
5117
5118        trace_ocfs2_add_new_xattr_cluster_begin(
5119                (unsigned long long)OCFS2_I(inode)->ip_blkno,
5120                (unsigned long long)bucket_blkno(first),
5121                prev_cpos, prev_clusters);
5122
5123        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5124
5125        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5126                                      OCFS2_JOURNAL_ACCESS_WRITE);
5127        if (ret < 0) {
5128                mlog_errno(ret);
5129                goto leave;
5130        }
5131
5132        ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5133                                     clusters_to_add, &bit_off, &num_bits);
5134        if (ret < 0) {
5135                if (ret != -ENOSPC)
5136                        mlog_errno(ret);
5137                goto leave;
5138        }
5139
5140        BUG_ON(num_bits > clusters_to_add);
5141
5142        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5143        trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
5144
5145        if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5146            (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5147             OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5148                /*
5149                 * If this cluster is contiguous with the old one and
5150                 * adding this new cluster, we don't surpass the limit of
5151                 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5152                 * initialized and used like other buckets in the previous
5153                 * cluster.
5154                 * So add it as a contiguous one. The caller will handle
5155                 * its init process.
5156                 */
5157                v_start = prev_cpos + prev_clusters;
5158                *num_clusters = prev_clusters + num_bits;
5159        } else {
5160                ret = ocfs2_adjust_xattr_cross_cluster(inode,
5161                                                       handle,
5162                                                       first,
5163                                                       target,
5164                                                       block,
5165                                                       prev_clusters,
5166                                                       &v_start,
5167                                                       extend);
5168                if (ret) {
5169                        mlog_errno(ret);
5170                        goto leave;
5171                }
5172        }
5173
5174        trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
5175                                                 v_start, num_bits);
5176        ret = ocfs2_insert_extent(handle, &et, v_start, block,
5177                                  num_bits, 0, ctxt->meta_ac);
5178        if (ret < 0) {
5179                mlog_errno(ret);
5180                goto leave;
5181        }
5182
5183        ocfs2_journal_dirty(handle, root_bh);
5184
5185leave:
5186        return ret;
5187}
5188
5189/*
5190 * We are given an extent.  'first' is the bucket at the very front of
5191 * the extent.  The extent has space for an additional bucket past
5192 * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5193 * of the target bucket.  We wish to shift every bucket past the target
5194 * down one, filling in that additional space.  When we get back to the
5195 * target, we split the target between itself and the now-empty bucket
5196 * at target+1 (aka, target_blkno + blks_per_bucket).
5197 */
5198static int ocfs2_extend_xattr_bucket(struct inode *inode,
5199                                     handle_t *handle,
5200                                     struct ocfs2_xattr_bucket *first,
5201                                     u64 target_blk,
5202                                     u32 num_clusters)
5203{
5204        int ret, credits;
5205        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5206        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5207        u64 end_blk;
5208        u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5209
5210        trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
5211                                        (unsigned long long)bucket_blkno(first),
5212                                        num_clusters, new_bucket);
5213
5214        /* The extent must have room for an additional bucket */
5215        BUG_ON(new_bucket >=
5216               (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5217
5218        /* end_blk points to the last existing bucket */
5219        end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5220
5221        /*
5222         * end_blk is the start of the last existing bucket.
5223         * Thus, (end_blk - target_blk) covers the target bucket and
5224         * every bucket after it up to, but not including, the last
5225         * existing bucket.  Then we add the last existing bucket, the
5226         * new bucket, and the first bucket (3 * blk_per_bucket).
5227         */
5228        credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5229        ret = ocfs2_extend_trans(handle, credits);
5230        if (ret) {
5231                mlog_errno(ret);
5232                goto out;
5233        }
5234
5235        ret = ocfs2_xattr_bucket_journal_access(handle, first,
5236                                                OCFS2_JOURNAL_ACCESS_WRITE);
5237        if (ret) {
5238                mlog_errno(ret);
5239                goto out;
5240        }
5241
5242        while (end_blk != target_blk) {
5243                ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5244                                            end_blk + blk_per_bucket, 0);
5245                if (ret)
5246                        goto out;
5247                end_blk -= blk_per_bucket;
5248        }
5249
5250        /* Move half of the xattr in target_blkno to the next bucket. */
5251        ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5252                                        target_blk + blk_per_bucket, NULL, 0);
5253
5254        le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5255        ocfs2_xattr_bucket_journal_dirty(handle, first);
5256
5257out:
5258        return ret;
5259}
5260
5261/*
5262 * Add new xattr bucket in an extent record and adjust the buckets
5263 * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5264 * bucket we want to insert into.
5265 *
5266 * In the easy case, we will move all the buckets after target down by
5267 * one. Half of target's xattrs will be moved to the next bucket.
5268 *
5269 * If current cluster is full, we'll allocate a new one.  This may not
5270 * be contiguous.  The underlying calls will make sure that there is
5271 * space for the insert, shifting buckets around if necessary.
5272 * 'target' may be moved by those calls.
5273 */
5274static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5275                                      struct buffer_head *xb_bh,
5276                                      struct ocfs2_xattr_bucket *target,
5277                                      struct ocfs2_xattr_set_ctxt *ctxt)
5278{
5279        struct ocfs2_xattr_block *xb =
5280                        (struct ocfs2_xattr_block *)xb_bh->b_data;
5281        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5282        struct ocfs2_extent_list *el = &xb_root->xt_list;
5283        u32 name_hash =
5284                le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5285        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5286        int ret, num_buckets, extend = 1;
5287        u64 p_blkno;
5288        u32 e_cpos, num_clusters;
5289        /* The bucket at the front of the extent */
5290        struct ocfs2_xattr_bucket *first;
5291
5292        trace_ocfs2_add_new_xattr_bucket(
5293                                (unsigned long long)bucket_blkno(target));
5294
5295        /* The first bucket of the original extent */
5296        first = ocfs2_xattr_bucket_new(inode);
5297        if (!first) {
5298                ret = -ENOMEM;
5299                mlog_errno(ret);
5300                goto out;
5301        }
5302
5303        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5304                                  &num_clusters, el);
5305        if (ret) {
5306                mlog_errno(ret);
5307                goto out;
5308        }
5309
5310        ret = ocfs2_read_xattr_bucket(first, p_blkno);
5311        if (ret) {
5312                mlog_errno(ret);
5313                goto out;
5314        }
5315
5316        num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5317        if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5318                /*
5319                 * This can move first+target if the target bucket moves
5320                 * to the new extent.
5321                 */
5322                ret = ocfs2_add_new_xattr_cluster(inode,
5323                                                  xb_bh,
5324                                                  first,
5325                                                  target,
5326                                                  &num_clusters,
5327                                                  e_cpos,
5328                                                  &extend,
5329                                                  ctxt);
5330                if (ret) {
5331                        mlog_errno(ret);
5332                        goto out;
5333                }
5334        }
5335
5336        if (extend) {
5337                ret = ocfs2_extend_xattr_bucket(inode,
5338                                                ctxt->handle,
5339                                                first,
5340                                                bucket_blkno(target),
5341                                                num_clusters);
5342                if (ret)
5343                        mlog_errno(ret);
5344        }
5345
5346out:
5347        ocfs2_xattr_bucket_free(first);
5348
5349        return ret;
5350}
5351
5352/*
5353 * Truncate the specified xe_off entry in xattr bucket.
5354 * bucket is indicated by header_bh and len is the new length.
5355 * Both the ocfs2_xattr_value_root and the entry will be updated here.
5356 *
5357 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5358 */
5359static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5360                                             struct ocfs2_xattr_bucket *bucket,
5361                                             int xe_off,
5362                                             int len,
5363                                             struct ocfs2_xattr_set_ctxt *ctxt)
5364{
5365        int ret, offset;
5366        u64 value_blk;
5367        struct ocfs2_xattr_entry *xe;
5368        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5369        size_t blocksize = inode->i_sb->s_blocksize;
5370        struct ocfs2_xattr_value_buf vb = {
5371                .vb_access = ocfs2_journal_access,
5372        };
5373
5374        xe = &xh->xh_entries[xe_off];
5375
5376        BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5377
5378        offset = le16_to_cpu(xe->xe_name_offset) +
5379                 OCFS2_XATTR_SIZE(xe->xe_name_len);
5380
5381        value_blk = offset / blocksize;
5382
5383        /* We don't allow ocfs2_xattr_value to be stored in different block. */
5384        BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5385
5386        vb.vb_bh = bucket->bu_bhs[value_blk];
5387        BUG_ON(!vb.vb_bh);
5388
5389        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5390                (vb.vb_bh->b_data + offset % blocksize);
5391
5392        /*
5393         * From here on out we have to dirty the bucket.  The generic
5394         * value calls only modify one of the bucket's bhs, but we need
5395         * to send the bucket at once.  So if they error, they *could* have
5396         * modified something.  We have to assume they did, and dirty
5397         * the whole bucket.  This leaves us in a consistent state.
5398         */
5399        trace_ocfs2_xattr_bucket_value_truncate(
5400                        (unsigned long long)bucket_blkno(bucket), xe_off, len);
5401        ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5402        if (ret) {
5403                mlog_errno(ret);
5404                goto out;
5405        }
5406
5407        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5408                                                OCFS2_JOURNAL_ACCESS_WRITE);
5409        if (ret) {
5410                mlog_errno(ret);
5411                goto out;
5412        }
5413
5414        xe->xe_value_size = cpu_to_le64(len);
5415
5416        ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5417
5418out:
5419        return ret;
5420}
5421
5422static int ocfs2_rm_xattr_cluster(struct inode *inode,
5423                                  struct buffer_head *root_bh,
5424                                  u64 blkno,
5425                                  u32 cpos,
5426                                  u32 len,
5427                                  void *para)
5428{
5429        int ret;
5430        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5431        struct inode *tl_inode = osb->osb_tl_inode;
5432        handle_t *handle;
5433        struct ocfs2_xattr_block *xb =
5434                        (struct ocfs2_xattr_block *)root_bh->b_data;
5435        struct ocfs2_alloc_context *meta_ac = NULL;
5436        struct ocfs2_cached_dealloc_ctxt dealloc;
5437        struct ocfs2_extent_tree et;
5438
5439        ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5440                                          ocfs2_delete_xattr_in_bucket, para);
5441        if (ret) {
5442                mlog_errno(ret);
5443                return ret;
5444        }
5445
5446        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5447
5448        ocfs2_init_dealloc_ctxt(&dealloc);
5449
5450        trace_ocfs2_rm_xattr_cluster(
5451                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
5452                        (unsigned long long)blkno, cpos, len);
5453
5454        ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5455                                               len);
5456
5457        ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5458        if (ret) {
5459                mlog_errno(ret);
5460                return ret;
5461        }
5462
5463        inode_lock(tl_inode);
5464
5465        if (ocfs2_truncate_log_needs_flush(osb)) {
5466                ret = __ocfs2_flush_truncate_log(osb);
5467                if (ret < 0) {
5468                        mlog_errno(ret);
5469                        goto out;
5470                }
5471        }
5472
5473        handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5474        if (IS_ERR(handle)) {
5475                ret = -ENOMEM;
5476                mlog_errno(ret);
5477                goto out;
5478        }
5479
5480        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5481                                      OCFS2_JOURNAL_ACCESS_WRITE);
5482        if (ret) {
5483                mlog_errno(ret);
5484                goto out_commit;
5485        }
5486
5487        ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5488                                  &dealloc);
5489        if (ret) {
5490                mlog_errno(ret);
5491                goto out_commit;
5492        }
5493
5494        le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5495        ocfs2_journal_dirty(handle, root_bh);
5496
5497        ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5498        if (ret)
5499                mlog_errno(ret);
5500        ocfs2_update_inode_fsync_trans(handle, inode, 0);
5501
5502out_commit:
5503        ocfs2_commit_trans(osb, handle);
5504out:
5505        ocfs2_schedule_truncate_log_flush(osb, 1);
5506
5507        inode_unlock(tl_inode);
5508
5509        if (meta_ac)
5510                ocfs2_free_alloc_context(meta_ac);
5511
5512        ocfs2_run_deallocs(osb, &dealloc);
5513
5514        return ret;
5515}
5516
5517/*
5518 * check whether the xattr bucket is filled up with the same hash value.
5519 * If we want to insert the xattr with the same hash, return -ENOSPC.
5520 * If we want to insert a xattr with different hash value, go ahead
5521 * and ocfs2_divide_xattr_bucket will handle this.
5522 */
5523static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5524                                              struct ocfs2_xattr_bucket *bucket,
5525                                              const char *name)
5526{
5527        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5528        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5529
5530        if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5531                return 0;
5532
5533        if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5534            xh->xh_entries[0].xe_name_hash) {
5535                mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5536                     "hash = %u\n",
5537                     (unsigned long long)bucket_blkno(bucket),
5538                     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5539                return -ENOSPC;
5540        }
5541
5542        return 0;
5543}
5544
5545/*
5546 * Try to set the entry in the current bucket.  If we fail, the caller
5547 * will handle getting us another bucket.
5548 */
5549static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5550                                        struct ocfs2_xattr_info *xi,
5551                                        struct ocfs2_xattr_search *xs,
5552                                        struct ocfs2_xattr_set_ctxt *ctxt)
5553{
5554        int ret;
5555        struct ocfs2_xa_loc loc;
5556
5557        trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
5558
5559        ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5560                                       xs->not_found ? NULL : xs->here);
5561        ret = ocfs2_xa_set(&loc, xi, ctxt);
5562        if (!ret) {
5563                xs->here = loc.xl_entry;
5564                goto out;
5565        }
5566        if (ret != -ENOSPC) {
5567                mlog_errno(ret);
5568                goto out;
5569        }
5570
5571        /* Ok, we need space.  Let's try defragmenting the bucket. */
5572        ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5573                                        xs->bucket);
5574        if (ret) {
5575                mlog_errno(ret);
5576                goto out;
5577        }
5578
5579        ret = ocfs2_xa_set(&loc, xi, ctxt);
5580        if (!ret) {
5581                xs->here = loc.xl_entry;
5582                goto out;
5583        }
5584        if (ret != -ENOSPC)
5585                mlog_errno(ret);
5586
5587
5588out:
5589        return ret;
5590}
5591
5592static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5593                                             struct ocfs2_xattr_info *xi,
5594                                             struct ocfs2_xattr_search *xs,
5595                                             struct ocfs2_xattr_set_ctxt *ctxt)
5596{
5597        int ret;
5598
5599        trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
5600
5601        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5602        if (!ret)
5603                goto out;
5604        if (ret != -ENOSPC) {
5605                mlog_errno(ret);
5606                goto out;
5607        }
5608
5609        /* Ack, need more space.  Let's try to get another bucket! */
5610
5611        /*
5612         * We do not allow for overlapping ranges between buckets. And
5613         * the maximum number of collisions we will allow for then is
5614         * one bucket's worth, so check it here whether we need to
5615         * add a new bucket for the insert.
5616         */
5617        ret = ocfs2_check_xattr_bucket_collision(inode,
5618                                                 xs->bucket,
5619                                                 xi->xi_name);
5620        if (ret) {
5621                mlog_errno(ret);
5622                goto out;
5623        }
5624
5625        ret = ocfs2_add_new_xattr_bucket(inode,
5626                                         xs->xattr_bh,
5627                                         xs->bucket,
5628                                         ctxt);
5629        if (ret) {
5630                mlog_errno(ret);
5631                goto out;
5632        }
5633
5634        /*
5635         * ocfs2_add_new_xattr_bucket() will have updated
5636         * xs->bucket if it moved, but it will not have updated
5637         * any of the other search fields.  Thus, we drop it and
5638         * re-search.  Everything should be cached, so it'll be
5639         * quick.
5640         */
5641        ocfs2_xattr_bucket_relse(xs->bucket);
5642        ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5643                                           xi->xi_name_index,
5644                                           xi->xi_name, xs);
5645        if (ret && ret != -ENODATA)
5646                goto out;
5647        xs->not_found = ret;
5648
5649        /* Ok, we have a new bucket, let's try again */
5650        ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5651        if (ret && (ret != -ENOSPC))
5652                mlog_errno(ret);
5653
5654out:
5655        return ret;
5656}
5657
5658static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5659                                        struct ocfs2_xattr_bucket *bucket,
5660                                        void *para)
5661{
5662        int ret = 0, ref_credits;
5663        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5664        u16 i;
5665        struct ocfs2_xattr_entry *xe;
5666        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5667        struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5668        int credits = ocfs2_remove_extent_credits(osb->sb) +
5669                ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5670        struct ocfs2_xattr_value_root *xv;
5671        struct ocfs2_rm_xattr_bucket_para *args =
5672                        (struct ocfs2_rm_xattr_bucket_para *)para;
5673
5674        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5675
5676        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5677                xe = &xh->xh_entries[i];
5678                if (ocfs2_xattr_is_local(xe))
5679                        continue;
5680
5681                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5682                                                      i, &xv, NULL);
5683                if (ret) {
5684                        mlog_errno(ret);
5685                        break;
5686                }
5687
5688                ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5689                                                         args->ref_ci,
5690                                                         args->ref_root_bh,
5691                                                         &ctxt.meta_ac,
5692                                                         &ref_credits);
5693
5694                ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5695                if (IS_ERR(ctxt.handle)) {
5696                        ret = PTR_ERR(ctxt.handle);
5697                        mlog_errno(ret);
5698                        break;
5699                }
5700
5701                ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5702                                                        i, 0, &ctxt);
5703
5704                ocfs2_commit_trans(osb, ctxt.handle);
5705                if (ctxt.meta_ac) {
5706                        ocfs2_free_alloc_context(ctxt.meta_ac);
5707                        ctxt.meta_ac = NULL;
5708                }
5709                if (ret) {
5710                        mlog_errno(ret);
5711                        break;
5712                }
5713        }
5714
5715        if (ctxt.meta_ac)
5716                ocfs2_free_alloc_context(ctxt.meta_ac);
5717        ocfs2_schedule_truncate_log_flush(osb, 1);
5718        ocfs2_run_deallocs(osb, &ctxt.dealloc);
5719        return ret;
5720}
5721
5722/*
5723 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5724 * or change the extent record flag), we need to recalculate
5725 * the metaecc for the whole bucket. So it is done here.
5726 *
5727 * Note:
5728 * We have to give the extra credits for the caller.
5729 */
5730static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5731                                            handle_t *handle,
5732                                            void *para)
5733{
5734        int ret;
5735        struct ocfs2_xattr_bucket *bucket =
5736                        (struct ocfs2_xattr_bucket *)para;
5737
5738        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5739                                                OCFS2_JOURNAL_ACCESS_WRITE);
5740        if (ret) {
5741                mlog_errno(ret);
5742                return ret;
5743        }
5744
5745        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5746
5747        return 0;
5748}
5749
5750/*
5751 * Special action we need if the xattr value is refcounted.
5752 *
5753 * 1. If the xattr is refcounted, lock the tree.
5754 * 2. CoW the xattr if we are setting the new value and the value
5755 *    will be stored outside.
5756 * 3. In other case, decrease_refcount will work for us, so just
5757 *    lock the refcount tree, calculate the meta and credits is OK.
5758 *
5759 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5760 * currently CoW is a completed transaction, while this function
5761 * will also lock the allocators and let us deadlock. So we will
5762 * CoW the whole xattr value.
5763 */
5764static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5765                                        struct ocfs2_dinode *di,
5766                                        struct ocfs2_xattr_info *xi,
5767                                        struct ocfs2_xattr_search *xis,
5768                                        struct ocfs2_xattr_search *xbs,
5769                                        struct ocfs2_refcount_tree **ref_tree,
5770                                        int *meta_add,
5771                                        int *credits)
5772{
5773        int ret = 0;
5774        struct ocfs2_xattr_block *xb;
5775        struct ocfs2_xattr_entry *xe;
5776        char *base;
5777        u32 p_cluster, num_clusters;
5778        unsigned int ext_flags;
5779        int name_offset, name_len;
5780        struct ocfs2_xattr_value_buf vb;
5781        struct ocfs2_xattr_bucket *bucket = NULL;
5782        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5783        struct ocfs2_post_refcount refcount;
5784        struct ocfs2_post_refcount *p = NULL;
5785        struct buffer_head *ref_root_bh = NULL;
5786
5787        if (!xis->not_found) {
5788                xe = xis->here;
5789                name_offset = le16_to_cpu(xe->xe_name_offset);
5790                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5791                base = xis->base;
5792                vb.vb_bh = xis->inode_bh;
5793                vb.vb_access = ocfs2_journal_access_di;
5794        } else {
5795                int i, block_off = 0;
5796                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5797                xe = xbs->here;
5798                name_offset = le16_to_cpu(xe->xe_name_offset);
5799                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5800                i = xbs->here - xbs->header->xh_entries;
5801
5802                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5803                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5804                                                        bucket_xh(xbs->bucket),
5805                                                        i, &block_off,
5806                                                        &name_offset);
5807                        if (ret) {
5808                                mlog_errno(ret);
5809                                goto out;
5810                        }
5811                        base = bucket_block(xbs->bucket, block_off);
5812                        vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5813                        vb.vb_access = ocfs2_journal_access;
5814
5815                        if (ocfs2_meta_ecc(osb)) {
5816                                /*create parameters for ocfs2_post_refcount. */
5817                                bucket = xbs->bucket;
5818                                refcount.credits = bucket->bu_blocks;
5819                                refcount.para = bucket;
5820                                refcount.func =
5821                                        ocfs2_xattr_bucket_post_refcount;
5822                                p = &refcount;
5823                        }
5824                } else {
5825                        base = xbs->base;
5826                        vb.vb_bh = xbs->xattr_bh;
5827                        vb.vb_access = ocfs2_journal_access_xb;
5828                }
5829        }
5830
5831        if (ocfs2_xattr_is_local(xe))
5832                goto out;
5833
5834        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5835                                (base + name_offset + name_len);
5836
5837        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5838                                       &num_clusters, &vb.vb_xv->xr_list,
5839                                       &ext_flags);
5840        if (ret) {
5841                mlog_errno(ret);
5842                goto out;
5843        }
5844
5845        /*
5846         * We just need to check the 1st extent record, since we always
5847         * CoW the whole xattr. So there shouldn't be a xattr with
5848         * some REFCOUNT extent recs after the 1st one.
5849         */
5850        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5851                goto out;
5852
5853        ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5854                                       1, ref_tree, &ref_root_bh);
5855        if (ret) {
5856                mlog_errno(ret);
5857                goto out;
5858        }
5859
5860        /*
5861         * If we are deleting the xattr or the new size will be stored inside,
5862         * cool, leave it there, the xattr truncate process will remove them
5863         * for us(it still needs the refcount tree lock and the meta, credits).
5864         * And the worse case is that every cluster truncate will split the
5865         * refcount tree, and make the original extent become 3. So we will need
5866         * 2 * cluster more extent recs at most.
5867         */
5868        if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5869
5870                ret = ocfs2_refcounted_xattr_delete_need(inode,
5871                                                         &(*ref_tree)->rf_ci,
5872                                                         ref_root_bh, vb.vb_xv,
5873                                                         meta_add, credits);
5874                if (ret)
5875                        mlog_errno(ret);
5876                goto out;
5877        }
5878
5879        ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5880                                       *ref_tree, ref_root_bh, 0,
5881                                       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5882        if (ret)
5883                mlog_errno(ret);
5884
5885out:
5886        brelse(ref_root_bh);
5887        return ret;
5888}
5889
5890/*
5891 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5892 * The physical clusters will be added to refcount tree.
5893 */
5894static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5895                                struct ocfs2_xattr_value_root *xv,
5896                                struct ocfs2_extent_tree *value_et,
5897                                struct ocfs2_caching_info *ref_ci,
5898                                struct buffer_head *ref_root_bh,
5899                                struct ocfs2_cached_dealloc_ctxt *dealloc,
5900                                struct ocfs2_post_refcount *refcount)
5901{
5902        int ret = 0;
5903        u32 clusters = le32_to_cpu(xv->xr_clusters);
5904        u32 cpos, p_cluster, num_clusters;
5905        struct ocfs2_extent_list *el = &xv->xr_list;
5906        unsigned int ext_flags;
5907
5908        cpos = 0;
5909        while (cpos < clusters) {
5910                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5911                                               &num_clusters, el, &ext_flags);
5912                if (ret) {
5913                        mlog_errno(ret);
5914                        break;
5915                }
5916
5917                cpos += num_clusters;
5918                if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5919                        continue;
5920
5921                BUG_ON(!p_cluster);
5922
5923                ret = ocfs2_add_refcount_flag(inode, value_et,
5924                                              ref_ci, ref_root_bh,
5925                                              cpos - num_clusters,
5926                                              p_cluster, num_clusters,
5927                                              dealloc, refcount);
5928                if (ret) {
5929                        mlog_errno(ret);
5930                        break;
5931                }
5932        }
5933
5934        return ret;
5935}
5936
5937/*
5938 * Given a normal ocfs2_xattr_header, refcount all the entries which
5939 * have value stored outside.
5940 * Used for xattrs stored in inode and ocfs2_xattr_block.
5941 */
5942static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5943                                struct ocfs2_xattr_value_buf *vb,
5944                                struct ocfs2_xattr_header *header,
5945                                struct ocfs2_caching_info *ref_ci,
5946                                struct buffer_head *ref_root_bh,
5947                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5948{
5949
5950        struct ocfs2_xattr_entry *xe;
5951        struct ocfs2_xattr_value_root *xv;
5952        struct ocfs2_extent_tree et;
5953        int i, ret = 0;
5954
5955        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5956                xe = &header->xh_entries[i];
5957
5958                if (ocfs2_xattr_is_local(xe))
5959                        continue;
5960
5961                xv = (struct ocfs2_xattr_value_root *)((void *)header +
5962                        le16_to_cpu(xe->xe_name_offset) +
5963                        OCFS2_XATTR_SIZE(xe->xe_name_len));
5964
5965                vb->vb_xv = xv;
5966                ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5967
5968                ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5969                                                        ref_ci, ref_root_bh,
5970                                                        dealloc, NULL);
5971                if (ret) {
5972                        mlog_errno(ret);
5973                        break;
5974                }
5975        }
5976
5977        return ret;
5978}
5979
5980static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5981                                struct buffer_head *fe_bh,
5982                                struct ocfs2_caching_info *ref_ci,
5983                                struct buffer_head *ref_root_bh,
5984                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5985{
5986        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5987        struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5988                                (fe_bh->b_data + inode->i_sb->s_blocksize -
5989                                le16_to_cpu(di->i_xattr_inline_size));
5990        struct ocfs2_xattr_value_buf vb = {
5991                .vb_bh = fe_bh,
5992                .vb_access = ocfs2_journal_access_di,
5993        };
5994
5995        return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5996                                                  ref_ci, ref_root_bh, dealloc);
5997}
5998
5999struct ocfs2_xattr_tree_value_refcount_para {
6000        struct ocfs2_caching_info *ref_ci;
6001        struct buffer_head *ref_root_bh;
6002        struct ocfs2_cached_dealloc_ctxt *dealloc;
6003};
6004
6005static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6006                                           struct ocfs2_xattr_bucket *bucket,
6007                                           int offset,
6008                                           struct ocfs2_xattr_value_root **xv,
6009                                           struct buffer_head **bh)
6010{
6011        int ret, block_off, name_offset;
6012        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6013        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6014        void *base;
6015
6016        ret = ocfs2_xattr_bucket_get_name_value(sb,
6017                                                bucket_xh(bucket),
6018                                                offset,
6019                                                &block_off,
6020                                                &name_offset);
6021        if (ret) {
6022                mlog_errno(ret);
6023                goto out;
6024        }
6025
6026        base = bucket_block(bucket, block_off);
6027
6028        *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6029                         OCFS2_XATTR_SIZE(xe->xe_name_len));
6030
6031        if (bh)
6032                *bh = bucket->bu_bhs[block_off];
6033out:
6034        return ret;
6035}
6036
6037/*
6038 * For a given xattr bucket, refcount all the entries which
6039 * have value stored outside.
6040 */
6041static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6042                                             struct ocfs2_xattr_bucket *bucket,
6043                                             void *para)
6044{
6045        int i, ret = 0;
6046        struct ocfs2_extent_tree et;
6047        struct ocfs2_xattr_tree_value_refcount_para *ref =
6048                        (struct ocfs2_xattr_tree_value_refcount_para *)para;
6049        struct ocfs2_xattr_header *xh =
6050                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6051        struct ocfs2_xattr_entry *xe;
6052        struct ocfs2_xattr_value_buf vb = {
6053                .vb_access = ocfs2_journal_access,
6054        };
6055        struct ocfs2_post_refcount refcount = {
6056                .credits = bucket->bu_blocks,
6057                .para = bucket,
6058                .func = ocfs2_xattr_bucket_post_refcount,
6059        };
6060        struct ocfs2_post_refcount *p = NULL;
6061
6062        /* We only need post_refcount if we support metaecc. */
6063        if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6064                p = &refcount;
6065
6066        trace_ocfs2_xattr_bucket_value_refcount(
6067                                (unsigned long long)bucket_blkno(bucket),
6068                                le16_to_cpu(xh->xh_count));
6069        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6070                xe = &xh->xh_entries[i];
6071
6072                if (ocfs2_xattr_is_local(xe))
6073                        continue;
6074
6075                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6076                                                      &vb.vb_xv, &vb.vb_bh);
6077                if (ret) {
6078                        mlog_errno(ret);
6079                        break;
6080                }
6081
6082                ocfs2_init_xattr_value_extent_tree(&et,
6083                                                   INODE_CACHE(inode), &vb);
6084
6085                ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6086                                                        &et, ref->ref_ci,
6087                                                        ref->ref_root_bh,
6088                                                        ref->dealloc, p);
6089                if (ret) {
6090                        mlog_errno(ret);
6091                        break;
6092                }
6093        }
6094
6095        return ret;
6096
6097}
6098
6099static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6100                                     struct buffer_head *root_bh,
6101                                     u64 blkno, u32 cpos, u32 len, void *para)
6102{
6103        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6104                                           ocfs2_xattr_bucket_value_refcount,
6105                                           para);
6106}
6107
6108static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6109                                struct buffer_head *blk_bh,
6110                                struct ocfs2_caching_info *ref_ci,
6111                                struct buffer_head *ref_root_bh,
6112                                struct ocfs2_cached_dealloc_ctxt *dealloc)
6113{
6114        int ret = 0;
6115        struct ocfs2_xattr_block *xb =
6116                                (struct ocfs2_xattr_block *)blk_bh->b_data;
6117
6118        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6119                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6120                struct ocfs2_xattr_value_buf vb = {
6121                        .vb_bh = blk_bh,
6122                        .vb_access = ocfs2_journal_access_xb,
6123                };
6124
6125                ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6126                                                         ref_ci, ref_root_bh,
6127                                                         dealloc);
6128        } else {
6129                struct ocfs2_xattr_tree_value_refcount_para para = {
6130                        .ref_ci = ref_ci,
6131                        .ref_root_bh = ref_root_bh,
6132                        .dealloc = dealloc,
6133                };
6134
6135                ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6136                                                ocfs2_refcount_xattr_tree_rec,
6137                                                &para);
6138        }
6139
6140        return ret;
6141}
6142
6143int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6144                                     struct buffer_head *fe_bh,
6145                                     struct ocfs2_caching_info *ref_ci,
6146                                     struct buffer_head *ref_root_bh,
6147                                     struct ocfs2_cached_dealloc_ctxt *dealloc)
6148{
6149        int ret = 0;
6150        struct ocfs2_inode_info *oi = OCFS2_I(inode);
6151        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6152        struct buffer_head *blk_bh = NULL;
6153
6154        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6155                ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6156                                                         ref_ci, ref_root_bh,
6157                                                         dealloc);
6158                if (ret) {
6159                        mlog_errno(ret);
6160                        goto out;
6161                }
6162        }
6163
6164        if (!di->i_xattr_loc)
6165                goto out;
6166
6167        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6168                                     &blk_bh);
6169        if (ret < 0) {
6170                mlog_errno(ret);
6171                goto out;
6172        }
6173
6174        ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6175                                                ref_root_bh, dealloc);
6176        if (ret)
6177                mlog_errno(ret);
6178
6179        brelse(blk_bh);
6180out:
6181
6182        return ret;
6183}
6184
6185typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6186/*
6187 * Store the information we need in xattr reflink.
6188 * old_bh and new_bh are inode bh for the old and new inode.
6189 */
6190struct ocfs2_xattr_reflink {
6191        struct inode *old_inode;
6192        struct inode *new_inode;
6193        struct buffer_head *old_bh;
6194        struct buffer_head *new_bh;
6195        struct ocfs2_caching_info *ref_ci;
6196        struct buffer_head *ref_root_bh;
6197        struct ocfs2_cached_dealloc_ctxt *dealloc;
6198        should_xattr_reflinked *xattr_reflinked;
6199};
6200
6201/*
6202 * Given a xattr header and xe offset,
6203 * return the proper xv and the corresponding bh.
6204 * xattr in inode, block and xattr tree have different implementaions.
6205 */
6206typedef int (get_xattr_value_root)(struct super_block *sb,
6207                                   struct buffer_head *bh,
6208                                   struct ocfs2_xattr_header *xh,
6209                                   int offset,
6210                                   struct ocfs2_xattr_value_root **xv,
6211                                   struct buffer_head **ret_bh,
6212                                   void *para);
6213
6214/*
6215 * Calculate all the xattr value root metadata stored in this xattr header and
6216 * credits we need if we create them from the scratch.
6217 * We use get_xattr_value_root so that all types of xattr container can use it.
6218 */
6219static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6220                                             struct buffer_head *bh,
6221                                             struct ocfs2_xattr_header *xh,
6222                                             int *metas, int *credits,
6223                                             int *num_recs,
6224                                             get_xattr_value_root *func,
6225                                             void *para)
6226{
6227        int i, ret = 0;
6228        struct ocfs2_xattr_value_root *xv;
6229        struct ocfs2_xattr_entry *xe;
6230
6231        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6232                xe = &xh->xh_entries[i];
6233                if (ocfs2_xattr_is_local(xe))
6234                        continue;
6235
6236                ret = func(sb, bh, xh, i, &xv, NULL, para);
6237                if (ret) {
6238                        mlog_errno(ret);
6239                        break;
6240                }
6241
6242                *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6243                          le16_to_cpu(xv->xr_list.l_next_free_rec);
6244
6245                *credits += ocfs2_calc_extend_credits(sb,
6246                                                &def_xv.xv.xr_list);
6247
6248                /*
6249                 * If the value is a tree with depth > 1, We don't go deep
6250                 * to the extent block, so just calculate a maximum record num.
6251                 */
6252                if (!xv->xr_list.l_tree_depth)
6253                        *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6254                else
6255                        *num_recs += ocfs2_clusters_for_bytes(sb,
6256                                                              XATTR_SIZE_MAX);
6257        }
6258
6259        return ret;
6260}
6261
6262/* Used by xattr inode and block to return the right xv and buffer_head. */
6263static int ocfs2_get_xattr_value_root(struct super_block *sb,
6264                                      struct buffer_head *bh,
6265                                      struct ocfs2_xattr_header *xh,
6266                                      int offset,
6267                                      struct ocfs2_xattr_value_root **xv,
6268                                      struct buffer_head **ret_bh,
6269                                      void *para)
6270{
6271        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6272
6273        *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6274                le16_to_cpu(xe->xe_name_offset) +
6275                OCFS2_XATTR_SIZE(xe->xe_name_len));
6276
6277        if (ret_bh)
6278                *ret_bh = bh;
6279
6280        return 0;
6281}
6282
6283/*
6284 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6285 * It is only used for inline xattr and xattr block.
6286 */
6287static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6288                                        struct ocfs2_xattr_header *xh,
6289                                        struct buffer_head *ref_root_bh,
6290                                        int *credits,
6291                                        struct ocfs2_alloc_context **meta_ac)
6292{
6293        int ret, meta_add = 0, num_recs = 0;
6294        struct ocfs2_refcount_block *rb =
6295                        (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6296
6297        *credits = 0;
6298
6299        ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6300                                                &meta_add, credits, &num_recs,
6301                                                ocfs2_get_xattr_value_root,
6302                                                NULL);
6303        if (ret) {
6304                mlog_errno(ret);
6305                goto out;
6306        }
6307
6308        /*
6309         * We need to add/modify num_recs in refcount tree, so just calculate
6310         * an approximate number we need for refcount tree change.
6311         * Sometimes we need to split the tree, and after split,  half recs
6312         * will be moved to the new block, and a new block can only provide
6313         * half number of recs. So we multiple new blocks by 2.
6314         */
6315        num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6316        meta_add += num_recs;
6317        *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6318        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6319                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6320                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6321        else
6322                *credits += 1;
6323
6324        ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6325        if (ret)
6326                mlog_errno(ret);
6327
6328out:
6329        return ret;
6330}
6331
6332/*
6333 * Given a xattr header, reflink all the xattrs in this container.
6334 * It can be used for inode, block and bucket.
6335 *
6336 * NOTE:
6337 * Before we call this function, the caller has memcpy the xattr in
6338 * old_xh to the new_xh.
6339 *
6340 * If args.xattr_reflinked is set, call it to decide whether the xe should
6341 * be reflinked or not. If not, remove it from the new xattr header.
6342 */
6343static int ocfs2_reflink_xattr_header(handle_t *handle,
6344                                      struct ocfs2_xattr_reflink *args,
6345                                      struct buffer_head *old_bh,
6346                                      struct ocfs2_xattr_header *xh,
6347                                      struct buffer_head *new_bh,
6348                                      struct ocfs2_xattr_header *new_xh,
6349                                      struct ocfs2_xattr_value_buf *vb,
6350                                      struct ocfs2_alloc_context *meta_ac,
6351                                      get_xattr_value_root *func,
6352                                      void *para)
6353{
6354        int ret = 0, i, j;
6355        struct super_block *sb = args->old_inode->i_sb;
6356        struct buffer_head *value_bh;
6357        struct ocfs2_xattr_entry *xe, *last;
6358        struct ocfs2_xattr_value_root *xv, *new_xv;
6359        struct ocfs2_extent_tree data_et;
6360        u32 clusters, cpos, p_cluster, num_clusters;
6361        unsigned int ext_flags = 0;
6362
6363        trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
6364                                         le16_to_cpu(xh->xh_count));
6365
6366        last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6367        for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6368                xe = &xh->xh_entries[i];
6369
6370                if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6371                        xe = &new_xh->xh_entries[j];
6372
6373                        le16_add_cpu(&new_xh->xh_count, -1);
6374                        if (new_xh->xh_count) {
6375                                memmove(xe, xe + 1,
6376                                        (void *)last - (void *)xe);
6377                                memset(last, 0,
6378                                       sizeof(struct ocfs2_xattr_entry));
6379                        }
6380
6381                        /*
6382                         * We don't want j to increase in the next round since
6383                         * it is already moved ahead.
6384                         */
6385                        j--;
6386                        continue;
6387                }
6388
6389                if (ocfs2_xattr_is_local(xe))
6390                        continue;
6391
6392                ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6393                if (ret) {
6394                        mlog_errno(ret);
6395                        break;
6396                }
6397
6398                ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6399                if (ret) {
6400                        mlog_errno(ret);
6401                        break;
6402                }
6403
6404                /*
6405                 * For the xattr which has l_tree_depth = 0, all the extent
6406                 * recs have already be copied to the new xh with the
6407                 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6408                 * increase the refount count int the refcount tree.
6409                 *
6410                 * For the xattr which has l_tree_depth > 0, we need
6411                 * to initialize it to the empty default value root,
6412                 * and then insert the extents one by one.
6413                 */
6414                if (xv->xr_list.l_tree_depth) {
6415                        memcpy(new_xv, &def_xv, sizeof(def_xv));
6416                        vb->vb_xv = new_xv;
6417                        vb->vb_bh = value_bh;
6418                        ocfs2_init_xattr_value_extent_tree(&data_et,
6419                                        INODE_CACHE(args->new_inode), vb);
6420                }
6421
6422                clusters = le32_to_cpu(xv->xr_clusters);
6423                cpos = 0;
6424                while (cpos < clusters) {
6425                        ret = ocfs2_xattr_get_clusters(args->old_inode,
6426                                                       cpos,
6427                                                       &p_cluster,
6428                                                       &num_clusters,
6429                                                       &xv->xr_list,
6430                                                       &ext_flags);
6431                        if (ret) {
6432                                mlog_errno(ret);
6433                                goto out;
6434                        }
6435
6436                        BUG_ON(!p_cluster);
6437
6438                        if (xv->xr_list.l_tree_depth) {
6439                                ret = ocfs2_insert_extent(handle,
6440                                                &data_et, cpos,
6441                                                ocfs2_clusters_to_blocks(
6442                                                        args->old_inode->i_sb,
6443                                                        p_cluster),
6444                                                num_clusters, ext_flags,
6445                                                meta_ac);
6446                                if (ret) {
6447                                        mlog_errno(ret);
6448                                        goto out;
6449                                }
6450                        }
6451
6452                        ret = ocfs2_increase_refcount(handle, args->ref_ci,
6453                                                      args->ref_root_bh,
6454                                                      p_cluster, num_clusters,
6455                                                      meta_ac, args->dealloc);
6456                        if (ret) {
6457                                mlog_errno(ret);
6458                                goto out;
6459                        }
6460
6461                        cpos += num_clusters;
6462                }
6463        }
6464
6465out:
6466        return ret;
6467}
6468
6469static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6470{
6471        int ret = 0, credits = 0;
6472        handle_t *handle;
6473        struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6474        struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6475        int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6476        int header_off = osb->sb->s_blocksize - inline_size;
6477        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6478                                        (args->old_bh->b_data + header_off);
6479        struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6480                                        (args->new_bh->b_data + header_off);
6481        struct ocfs2_alloc_context *meta_ac = NULL;
6482        struct ocfs2_inode_info *new_oi;
6483        struct ocfs2_dinode *new_di;
6484        struct ocfs2_xattr_value_buf vb = {
6485                .vb_bh = args->new_bh,
6486                .vb_access = ocfs2_journal_access_di,
6487        };
6488
6489        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6490                                                  &credits, &meta_ac);
6491        if (ret) {
6492                mlog_errno(ret);
6493                goto out;
6494        }
6495
6496        handle = ocfs2_start_trans(osb, credits);
6497        if (IS_ERR(handle)) {
6498                ret = PTR_ERR(handle);
6499                mlog_errno(ret);
6500                goto out;
6501        }
6502
6503        ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6504                                      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6505        if (ret) {
6506                mlog_errno(ret);
6507                goto out_commit;
6508        }
6509
6510        memcpy(args->new_bh->b_data + header_off,
6511               args->old_bh->b_data + header_off, inline_size);
6512
6513        new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6514        new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6515
6516        ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6517                                         args->new_bh, new_xh, &vb, meta_ac,
6518                                         ocfs2_get_xattr_value_root, NULL);
6519        if (ret) {
6520                mlog_errno(ret);
6521                goto out_commit;
6522        }
6523
6524        new_oi = OCFS2_I(args->new_inode);
6525        /*
6526         * Adjust extent record count to reserve space for extended attribute.
6527         * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
6528         */
6529        if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
6530            !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
6531                struct ocfs2_extent_list *el = &new_di->id2.i_list;
6532                le16_add_cpu(&el->l_count, -(inline_size /
6533                                        sizeof(struct ocfs2_extent_rec)));
6534        }
6535        spin_lock(&new_oi->ip_lock);
6536        new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6537        new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6538        spin_unlock(&new_oi->ip_lock);
6539
6540        ocfs2_journal_dirty(handle, args->new_bh);
6541
6542out_commit:
6543        ocfs2_commit_trans(osb, handle);
6544
6545out:
6546        if (meta_ac)
6547                ocfs2_free_alloc_context(meta_ac);
6548        return ret;
6549}
6550
6551static int ocfs2_create_empty_xattr_block(struct inode *inode,
6552                                          struct buffer_head *fe_bh,
6553                                          struct buffer_head **ret_bh,
6554                                          int indexed)
6555{
6556        int ret;
6557        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6558        struct ocfs2_xattr_set_ctxt ctxt;
6559
6560        memset(&ctxt, 0, sizeof(ctxt));
6561        ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6562        if (ret < 0) {
6563                mlog_errno(ret);
6564                return ret;
6565        }
6566
6567        ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6568        if (IS_ERR(ctxt.handle)) {
6569                ret = PTR_ERR(ctxt.handle);
6570                mlog_errno(ret);
6571                goto out;
6572        }
6573
6574        trace_ocfs2_create_empty_xattr_block(
6575                                (unsigned long long)fe_bh->b_blocknr, indexed);
6576        ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6577                                       ret_bh);
6578        if (ret)
6579                mlog_errno(ret);
6580
6581        ocfs2_commit_trans(osb, ctxt.handle);
6582out:
6583        ocfs2_free_alloc_context(ctxt.meta_ac);
6584        return ret;
6585}
6586
6587static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6588                                     struct buffer_head *blk_bh,
6589                                     struct buffer_head *new_blk_bh)
6590{
6591        int ret = 0, credits = 0;
6592        handle_t *handle;
6593        struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6594        struct ocfs2_dinode *new_di;
6595        struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6596        int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6597        struct ocfs2_xattr_block *xb =
6598                        (struct ocfs2_xattr_block *)blk_bh->b_data;
6599        struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6600        struct ocfs2_xattr_block *new_xb =
6601                        (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6602        struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6603        struct ocfs2_alloc_context *meta_ac;
6604        struct ocfs2_xattr_value_buf vb = {
6605                .vb_bh = new_blk_bh,
6606                .vb_access = ocfs2_journal_access_xb,
6607        };
6608
6609        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6610                                                  &credits, &meta_ac);
6611        if (ret) {
6612                mlog_errno(ret);
6613                return ret;
6614        }
6615
6616        /* One more credits in case we need to add xattr flags in new inode. */
6617        handle = ocfs2_start_trans(osb, credits + 1);
6618        if (IS_ERR(handle)) {
6619                ret = PTR_ERR(handle);
6620                mlog_errno(ret);
6621                goto out;
6622        }
6623
6624        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6625                ret = ocfs2_journal_access_di(handle,
6626                                              INODE_CACHE(args->new_inode),
6627                                              args->new_bh,
6628                                              OCFS2_JOURNAL_ACCESS_WRITE);
6629                if (ret) {
6630                        mlog_errno(ret);
6631                        goto out_commit;
6632                }
6633        }
6634
6635        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6636                                      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6637        if (ret) {
6638                mlog_errno(ret);
6639                goto out_commit;
6640        }
6641
6642        memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6643               osb->sb->s_blocksize - header_off);
6644
6645        ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6646                                         new_blk_bh, new_xh, &vb, meta_ac,
6647                                         ocfs2_get_xattr_value_root, NULL);
6648        if (ret) {
6649                mlog_errno(ret);
6650                goto out_commit;
6651        }
6652
6653        ocfs2_journal_dirty(handle, new_blk_bh);
6654
6655        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6656                new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6657                spin_lock(&new_oi->ip_lock);
6658                new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6659                new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6660                spin_unlock(&new_oi->ip_lock);
6661
6662                ocfs2_journal_dirty(handle, args->new_bh);
6663        }
6664
6665out_commit:
6666        ocfs2_commit_trans(osb, handle);
6667
6668out:
6669        ocfs2_free_alloc_context(meta_ac);
6670        return ret;
6671}
6672
6673struct ocfs2_reflink_xattr_tree_args {
6674        struct ocfs2_xattr_reflink *reflink;
6675        struct buffer_head *old_blk_bh;
6676        struct buffer_head *new_blk_bh;
6677        struct ocfs2_xattr_bucket *old_bucket;
6678        struct ocfs2_xattr_bucket *new_bucket;
6679};
6680
6681/*
6682 * NOTE:
6683 * We have to handle the case that both old bucket and new bucket
6684 * will call this function to get the right ret_bh.
6685 * So The caller must give us the right bh.
6686 */
6687static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6688                                        struct buffer_head *bh,
6689                                        struct ocfs2_xattr_header *xh,
6690                                        int offset,
6691                                        struct ocfs2_xattr_value_root **xv,
6692                                        struct buffer_head **ret_bh,
6693                                        void *para)
6694{
6695        struct ocfs2_reflink_xattr_tree_args *args =
6696                        (struct ocfs2_reflink_xattr_tree_args *)para;
6697        struct ocfs2_xattr_bucket *bucket;
6698
6699        if (bh == args->old_bucket->bu_bhs[0])
6700                bucket = args->old_bucket;
6701        else
6702                bucket = args->new_bucket;
6703
6704        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6705                                               xv, ret_bh);
6706}
6707
6708struct ocfs2_value_tree_metas {
6709        int num_metas;
6710        int credits;
6711        int num_recs;
6712};
6713
6714static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6715                                        struct buffer_head *bh,
6716                                        struct ocfs2_xattr_header *xh,
6717                                        int offset,
6718                                        struct ocfs2_xattr_value_root **xv,
6719                                        struct buffer_head **ret_bh,
6720                                        void *para)
6721{
6722        struct ocfs2_xattr_bucket *bucket =
6723                                (struct ocfs2_xattr_bucket *)para;
6724
6725        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6726                                               xv, ret_bh);
6727}
6728
6729static int ocfs2_calc_value_tree_metas(struct inode *inode,
6730                                      struct ocfs2_xattr_bucket *bucket,
6731                                      void *para)
6732{
6733        struct ocfs2_value_tree_metas *metas =
6734                        (struct ocfs2_value_tree_metas *)para;
6735        struct ocfs2_xattr_header *xh =
6736                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6737
6738        /* Add the credits for this bucket first. */
6739        metas->credits += bucket->bu_blocks;
6740        return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6741                                        xh, &metas->num_metas,
6742                                        &metas->credits, &metas->num_recs,
6743                                        ocfs2_value_tree_metas_in_bucket,
6744                                        bucket);
6745}
6746
6747/*
6748 * Given a xattr extent rec starting from blkno and having len clusters,
6749 * iterate all the buckets calculate how much metadata we need for reflinking
6750 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6751 */
6752static int ocfs2_lock_reflink_xattr_rec_allocators(
6753                                struct ocfs2_reflink_xattr_tree_args *args,
6754                                struct ocfs2_extent_tree *xt_et,
6755                                u64 blkno, u32 len, int *credits,
6756                                struct ocfs2_alloc_context **meta_ac,
6757                                struct ocfs2_alloc_context **data_ac)
6758{
6759        int ret, num_free_extents;
6760        struct ocfs2_value_tree_metas metas;
6761        struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6762        struct ocfs2_refcount_block *rb;
6763
6764        memset(&metas, 0, sizeof(metas));
6765
6766        ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6767                                          ocfs2_calc_value_tree_metas, &metas);
6768        if (ret) {
6769                mlog_errno(ret);
6770                goto out;
6771        }
6772
6773        *credits = metas.credits;
6774
6775        /*
6776         * Calculate we need for refcount tree change.
6777         *
6778         * We need to add/modify num_recs in refcount tree, so just calculate
6779         * an approximate number we need for refcount tree change.
6780         * Sometimes we need to split the tree, and after split,  half recs
6781         * will be moved to the new block, and a new block can only provide
6782         * half number of recs. So we multiple new blocks by 2.
6783         * In the end, we have to add credits for modifying the already
6784         * existed refcount block.
6785         */
6786        rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6787        metas.num_recs =
6788                (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6789                 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6790        metas.num_metas += metas.num_recs;
6791        *credits += metas.num_recs +
6792                    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6793        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6794                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6795                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6796        else
6797                *credits += 1;
6798
6799        /* count in the xattr tree change. */
6800        num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6801        if (num_free_extents < 0) {
6802                ret = num_free_extents;
6803                mlog_errno(ret);
6804                goto out;
6805        }
6806
6807        if (num_free_extents < len)
6808                metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6809
6810        *credits += ocfs2_calc_extend_credits(osb->sb,
6811                                              xt_et->et_root_el);
6812
6813        if (metas.num_metas) {
6814                ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6815                                                        meta_ac);
6816                if (ret) {
6817                        mlog_errno(ret);
6818                        goto out;
6819                }
6820        }
6821
6822        if (len) {
6823                ret = ocfs2_reserve_clusters(osb, len, data_ac);
6824                if (ret)
6825                        mlog_errno(ret);
6826        }
6827out:
6828        if (ret) {
6829                if (*meta_ac) {
6830                        ocfs2_free_alloc_context(*meta_ac);
6831                        *meta_ac = NULL;
6832                }
6833        }
6834
6835        return ret;
6836}
6837
6838static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6839                                u64 blkno, u64 new_blkno, u32 clusters,
6840                                u32 *cpos, int num_buckets,
6841                                struct ocfs2_alloc_context *meta_ac,
6842                                struct ocfs2_alloc_context *data_ac,
6843                                struct ocfs2_reflink_xattr_tree_args *args)
6844{
6845        int i, j, ret = 0;
6846        struct super_block *sb = args->reflink->old_inode->i_sb;
6847        int bpb = args->old_bucket->bu_blocks;
6848        struct ocfs2_xattr_value_buf vb = {
6849                .vb_access = ocfs2_journal_access,
6850        };
6851
6852        for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6853                ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6854                if (ret) {
6855                        mlog_errno(ret);
6856                        break;
6857                }
6858
6859                ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
6860                if (ret) {
6861                        mlog_errno(ret);
6862                        break;
6863                }
6864
6865                ret = ocfs2_xattr_bucket_journal_access(handle,
6866                                                args->new_bucket,
6867                                                OCFS2_JOURNAL_ACCESS_CREATE);
6868                if (ret) {
6869                        mlog_errno(ret);
6870                        break;
6871                }
6872
6873                for (j = 0; j < bpb; j++)
6874                        memcpy(bucket_block(args->new_bucket, j),
6875                               bucket_block(args->old_bucket, j),
6876                               sb->s_blocksize);
6877
6878                /*
6879                 * Record the start cpos so that we can use it to initialize
6880                 * our xattr tree we also set the xh_num_bucket for the new
6881                 * bucket.
6882                 */
6883                if (i == 0) {
6884                        *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6885                                            xh_entries[0].xe_name_hash);
6886                        bucket_xh(args->new_bucket)->xh_num_buckets =
6887                                cpu_to_le16(num_buckets);
6888                }
6889
6890                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6891
6892                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6893                                        args->old_bucket->bu_bhs[0],
6894                                        bucket_xh(args->old_bucket),
6895                                        args->new_bucket->bu_bhs[0],
6896                                        bucket_xh(args->new_bucket),
6897                                        &vb, meta_ac,
6898                                        ocfs2_get_reflink_xattr_value_root,
6899                                        args);
6900                if (ret) {
6901                        mlog_errno(ret);
6902                        break;
6903                }
6904
6905                /*
6906                 * Re-access and dirty the bucket to calculate metaecc.
6907                 * Because we may extend the transaction in reflink_xattr_header
6908                 * which will let the already accessed block gone.
6909                 */
6910                ret = ocfs2_xattr_bucket_journal_access(handle,
6911                                                args->new_bucket,
6912                                                OCFS2_JOURNAL_ACCESS_WRITE);
6913                if (ret) {
6914                        mlog_errno(ret);
6915                        break;
6916                }
6917
6918                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6919
6920                ocfs2_xattr_bucket_relse(args->old_bucket);
6921                ocfs2_xattr_bucket_relse(args->new_bucket);
6922        }
6923
6924        ocfs2_xattr_bucket_relse(args->old_bucket);
6925        ocfs2_xattr_bucket_relse(args->new_bucket);
6926        return ret;
6927}
6928
6929static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6930                                struct inode *inode,
6931                                struct ocfs2_reflink_xattr_tree_args *args,
6932                                struct ocfs2_extent_tree *et,
6933                                struct ocfs2_alloc_context *meta_ac,
6934                                struct ocfs2_alloc_context *data_ac,
6935                                u64 blkno, u32 cpos, u32 len)
6936{
6937        int ret, first_inserted = 0;
6938        u32 p_cluster, num_clusters, reflink_cpos = 0;
6939        u64 new_blkno;
6940        unsigned int num_buckets, reflink_buckets;
6941        unsigned int bpc =
6942                ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6943
6944        ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6945        if (ret) {
6946                mlog_errno(ret);
6947                goto out;
6948        }
6949        num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6950        ocfs2_xattr_bucket_relse(args->old_bucket);
6951
6952        while (len && num_buckets) {
6953                ret = ocfs2_claim_clusters(handle, data_ac,
6954                                           1, &p_cluster, &num_clusters);
6955                if (ret) {
6956                        mlog_errno(ret);
6957                        goto out;
6958                }
6959
6960                new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6961                reflink_buckets = min(num_buckets, bpc * num_clusters);
6962
6963                ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6964                                                 new_blkno, num_clusters,
6965                                                 &reflink_cpos, reflink_buckets,
6966                                                 meta_ac, data_ac, args);
6967                if (ret) {
6968                        mlog_errno(ret);
6969                        goto out;
6970                }
6971
6972                /*
6973                 * For the 1st allocated cluster, we make it use the same cpos
6974                 * so that the xattr tree looks the same as the original one
6975                 * in the most case.
6976                 */
6977                if (!first_inserted) {
6978                        reflink_cpos = cpos;
6979                        first_inserted = 1;
6980                }
6981                ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6982                                          num_clusters, 0, meta_ac);
6983                if (ret)
6984                        mlog_errno(ret);
6985
6986                trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
6987                                                  num_clusters, reflink_cpos);
6988
6989                len -= num_clusters;
6990                blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6991                num_buckets -= reflink_buckets;
6992        }
6993out:
6994        return ret;
6995}
6996
6997/*
6998 * Create the same xattr extent record in the new inode's xattr tree.
6999 */
7000static int ocfs2_reflink_xattr_rec(struct inode *inode,
7001                                   struct buffer_head *root_bh,
7002                                   u64 blkno,
7003                                   u32 cpos,
7004                                   u32 len,
7005                                   void *para)
7006{
7007        int ret, credits = 0;
7008        handle_t *handle;
7009        struct ocfs2_reflink_xattr_tree_args *args =
7010                        (struct ocfs2_reflink_xattr_tree_args *)para;
7011        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7012        struct ocfs2_alloc_context *meta_ac = NULL;
7013        struct ocfs2_alloc_context *data_ac = NULL;
7014        struct ocfs2_extent_tree et;
7015
7016        trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
7017
7018        ocfs2_init_xattr_tree_extent_tree(&et,
7019                                          INODE_CACHE(args->reflink->new_inode),
7020                                          args->new_blk_bh);
7021
7022        ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
7023                                                      len, &credits,
7024                                                      &meta_ac, &data_ac);
7025        if (ret) {
7026                mlog_errno(ret);
7027                goto out;
7028        }
7029
7030        handle = ocfs2_start_trans(osb, credits);
7031        if (IS_ERR(handle)) {
7032                ret = PTR_ERR(handle);
7033                mlog_errno(ret);
7034                goto out;
7035        }
7036
7037        ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7038                                          meta_ac, data_ac,
7039                                          blkno, cpos, len);
7040        if (ret)
7041                mlog_errno(ret);
7042
7043        ocfs2_commit_trans(osb, handle);
7044
7045out:
7046        if (meta_ac)
7047                ocfs2_free_alloc_context(meta_ac);
7048        if (data_ac)
7049                ocfs2_free_alloc_context(data_ac);
7050        return ret;
7051}
7052
7053/*
7054 * Create reflinked xattr buckets.
7055 * We will add bucket one by one, and refcount all the xattrs in the bucket
7056 * if they are stored outside.
7057 */
7058static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7059                                    struct buffer_head *blk_bh,
7060                                    struct buffer_head *new_blk_bh)
7061{
7062        int ret;
7063        struct ocfs2_reflink_xattr_tree_args para;
7064
7065        memset(&para, 0, sizeof(para));
7066        para.reflink = args;
7067        para.old_blk_bh = blk_bh;
7068        para.new_blk_bh = new_blk_bh;
7069
7070        para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7071        if (!para.old_bucket) {
7072                mlog_errno(-ENOMEM);
7073                return -ENOMEM;
7074        }
7075
7076        para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7077        if (!para.new_bucket) {
7078                ret = -ENOMEM;
7079                mlog_errno(ret);
7080                goto out;
7081        }
7082
7083        ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7084                                              ocfs2_reflink_xattr_rec,
7085                                              &para);
7086        if (ret)
7087                mlog_errno(ret);
7088
7089out:
7090        ocfs2_xattr_bucket_free(para.old_bucket);
7091        ocfs2_xattr_bucket_free(para.new_bucket);
7092        return ret;
7093}
7094
7095static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7096                                        struct buffer_head *blk_bh)
7097{
7098        int ret, indexed = 0;
7099        struct buffer_head *new_blk_bh = NULL;
7100        struct ocfs2_xattr_block *xb =
7101                        (struct ocfs2_xattr_block *)blk_bh->b_data;
7102
7103
7104        if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7105                indexed = 1;
7106
7107        ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7108                                             &new_blk_bh, indexed);
7109        if (ret) {
7110                mlog_errno(ret);
7111                goto out;
7112        }
7113
7114        if (!indexed)
7115                ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7116        else
7117                ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7118        if (ret)
7119                mlog_errno(ret);
7120
7121out:
7122        brelse(new_blk_bh);
7123        return ret;
7124}
7125
7126static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7127{
7128        int type = ocfs2_xattr_get_type(xe);
7129
7130        return type != OCFS2_XATTR_INDEX_SECURITY &&
7131               type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7132               type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7133}
7134
7135int ocfs2_reflink_xattrs(struct inode *old_inode,
7136                         struct buffer_head *old_bh,
7137                         struct inode *new_inode,
7138                         struct buffer_head *new_bh,
7139                         bool preserve_security)
7140{
7141        int ret;
7142        struct ocfs2_xattr_reflink args;
7143        struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7144        struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7145        struct buffer_head *blk_bh = NULL;
7146        struct ocfs2_cached_dealloc_ctxt dealloc;
7147        struct ocfs2_refcount_tree *ref_tree;
7148        struct buffer_head *ref_root_bh = NULL;
7149
7150        ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7151                                       le64_to_cpu(di->i_refcount_loc),
7152                                       1, &ref_tree, &ref_root_bh);
7153        if (ret) {
7154                mlog_errno(ret);
7155                goto out;
7156        }
7157
7158        ocfs2_init_dealloc_ctxt(&dealloc);
7159
7160        args.old_inode = old_inode;
7161        args.new_inode = new_inode;
7162        args.old_bh = old_bh;
7163        args.new_bh = new_bh;
7164        args.ref_ci = &ref_tree->rf_ci;
7165        args.ref_root_bh = ref_root_bh;
7166        args.dealloc = &dealloc;
7167        if (preserve_security)
7168                args.xattr_reflinked = NULL;
7169        else
7170                args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7171
7172        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7173                ret = ocfs2_reflink_xattr_inline(&args);
7174                if (ret) {
7175                        mlog_errno(ret);
7176                        goto out_unlock;
7177                }
7178        }
7179
7180        if (!di->i_xattr_loc)
7181                goto out_unlock;
7182
7183        ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7184                                     &blk_bh);
7185        if (ret < 0) {
7186                mlog_errno(ret);
7187                goto out_unlock;
7188        }
7189
7190        ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7191        if (ret)
7192                mlog_errno(ret);
7193
7194        brelse(blk_bh);
7195
7196out_unlock:
7197        ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7198                                   ref_tree, 1);
7199        brelse(ref_root_bh);
7200
7201        if (ocfs2_dealloc_has_cluster(&dealloc)) {
7202                ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7203                ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7204        }
7205
7206out:
7207        return ret;
7208}
7209
7210/*
7211 * Initialize security and acl for a already created inode.
7212 * Used for reflink a non-preserve-security file.
7213 *
7214 * It uses common api like ocfs2_xattr_set, so the caller
7215 * must not hold any lock expect i_mutex.
7216 */
7217int ocfs2_init_security_and_acl(struct inode *dir,
7218                                struct inode *inode,
7219                                const struct qstr *qstr)
7220{
7221        int ret = 0;
7222        struct buffer_head *dir_bh = NULL;
7223
7224        ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7225        if (ret) {
7226                mlog_errno(ret);
7227                goto leave;
7228        }
7229
7230        ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7231        if (ret) {
7232                mlog_errno(ret);
7233                goto leave;
7234        }
7235        ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7236        if (ret)
7237                mlog_errno(ret);
7238
7239        ocfs2_inode_unlock(dir, 0);
7240        brelse(dir_bh);
7241leave:
7242        return ret;
7243}
7244
7245/*
7246 * 'security' attributes support
7247 */
7248static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
7249                                    struct dentry *dentry, const char *name,
7250                                    void *buffer, size_t size)
7251{
7252        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
7253                               name, buffer, size);
7254}
7255
7256static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
7257                                    struct dentry *dentry, const char *name,
7258                                    const void *value, size_t size, int flags)
7259{
7260        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
7261                               name, value, size, flags);
7262}
7263
7264static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
7265                     void *fs_info)
7266{
7267        const struct xattr *xattr;
7268        int err = 0;
7269
7270        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
7271                err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7272                                      xattr->name, xattr->value,
7273                                      xattr->value_len, XATTR_CREATE);
7274                if (err)
7275                        break;
7276        }
7277        return err;
7278}
7279
7280int ocfs2_init_security_get(struct inode *inode,
7281                            struct inode *dir,
7282                            const struct qstr *qstr,
7283                            struct ocfs2_security_xattr_info *si)
7284{
7285        /* check whether ocfs2 support feature xattr */
7286        if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7287                return -EOPNOTSUPP;
7288        if (si)
7289                return security_old_inode_init_security(inode, dir, qstr,
7290                                                        &si->name, &si->value,
7291                                                        &si->value_len);
7292
7293        return security_inode_init_security(inode, dir, qstr,
7294                                            &ocfs2_initxattrs, NULL);
7295}
7296
7297int ocfs2_init_security_set(handle_t *handle,
7298                            struct inode *inode,
7299                            struct buffer_head *di_bh,
7300                            struct ocfs2_security_xattr_info *si,
7301                            struct ocfs2_alloc_context *xattr_ac,
7302                            struct ocfs2_alloc_context *data_ac)
7303{
7304        return ocfs2_xattr_set_handle(handle, inode, di_bh,
7305                                     OCFS2_XATTR_INDEX_SECURITY,
7306                                     si->name, si->value, si->value_len, 0,
7307                                     xattr_ac, data_ac);
7308}
7309
7310const struct xattr_handler ocfs2_xattr_security_handler = {
7311        .prefix = XATTR_SECURITY_PREFIX,
7312        .get    = ocfs2_xattr_security_get,
7313        .set    = ocfs2_xattr_security_set,
7314};
7315
7316/*
7317 * 'trusted' attributes support
7318 */
7319static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
7320                                   struct dentry *dentry, const char *name,
7321                                   void *buffer, size_t size)
7322{
7323        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
7324                               name, buffer, size);
7325}
7326
7327static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
7328                                   struct dentry *dentry, const char *name,
7329                                   const void *value, size_t size, int flags)
7330{
7331        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
7332                               name, value, size, flags);
7333}
7334
7335const struct xattr_handler ocfs2_xattr_trusted_handler = {
7336        .prefix = XATTR_TRUSTED_PREFIX,
7337        .get    = ocfs2_xattr_trusted_get,
7338        .set    = ocfs2_xattr_trusted_set,
7339};
7340
7341/*
7342 * 'user' attributes support
7343 */
7344static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
7345                                struct dentry *dentry, const char *name,
7346                                void *buffer, size_t size)
7347{
7348        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7349
7350        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7351                return -EOPNOTSUPP;
7352        return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
7353                               buffer, size);
7354}
7355
7356static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
7357                                struct dentry *dentry, const char *name,
7358                                const void *value, size_t size, int flags)
7359{
7360        struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7361
7362        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7363                return -EOPNOTSUPP;
7364
7365        return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER,
7366                               name, value, size, flags);
7367}
7368
7369const struct xattr_handler ocfs2_xattr_user_handler = {
7370        .prefix = XATTR_USER_PREFIX,
7371        .get    = ocfs2_xattr_user_get,
7372        .set    = ocfs2_xattr_user_set,
7373};
7374