linux/fs/ocfs2/xattr.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * xattr.c
   5 *
   6 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
   7 *
   8 * CREDITS:
   9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
  10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  11 *
  12 * This program is free software; you can redistribute it and/or
  13 * modify it under the terms of the GNU General Public
  14 * License version 2 as published by the Free Software Foundation.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 * General Public License for more details.
  20 */
  21
  22#include <linux/capability.h>
  23#include <linux/fs.h>
  24#include <linux/types.h>
  25#include <linux/slab.h>
  26#include <linux/highmem.h>
  27#include <linux/pagemap.h>
  28#include <linux/uio.h>
  29#include <linux/sched.h>
  30#include <linux/splice.h>
  31#include <linux/mount.h>
  32#include <linux/writeback.h>
  33#include <linux/falloc.h>
  34#include <linux/sort.h>
  35#include <linux/init.h>
  36#include <linux/module.h>
  37#include <linux/string.h>
  38#include <linux/security.h>
  39
  40#define MLOG_MASK_PREFIX ML_XATTR
  41#include <cluster/masklog.h>
  42
  43#include "ocfs2.h"
  44#include "alloc.h"
  45#include "blockcheck.h"
  46#include "dlmglue.h"
  47#include "file.h"
  48#include "symlink.h"
  49#include "sysfile.h"
  50#include "inode.h"
  51#include "journal.h"
  52#include "ocfs2_fs.h"
  53#include "suballoc.h"
  54#include "uptodate.h"
  55#include "buffer_head_io.h"
  56#include "super.h"
  57#include "xattr.h"
  58#include "refcounttree.h"
  59#include "acl.h"
  60
  61struct ocfs2_xattr_def_value_root {
  62        struct ocfs2_xattr_value_root   xv;
  63        struct ocfs2_extent_rec         er;
  64};
  65
  66struct ocfs2_xattr_bucket {
  67        /* The inode these xattrs are associated with */
  68        struct inode *bu_inode;
  69
  70        /* The actual buffers that make up the bucket */
  71        struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
  72
  73        /* How many blocks make up one bucket for this filesystem */
  74        int bu_blocks;
  75};
  76
  77struct ocfs2_xattr_set_ctxt {
  78        handle_t *handle;
  79        struct ocfs2_alloc_context *meta_ac;
  80        struct ocfs2_alloc_context *data_ac;
  81        struct ocfs2_cached_dealloc_ctxt dealloc;
  82};
  83
  84#define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
  85#define OCFS2_XATTR_INLINE_SIZE 80
  86#define OCFS2_XATTR_HEADER_GAP  4
  87#define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
  88                                         - sizeof(struct ocfs2_xattr_header) \
  89                                         - OCFS2_XATTR_HEADER_GAP)
  90#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
  91                                         - sizeof(struct ocfs2_xattr_block) \
  92                                         - sizeof(struct ocfs2_xattr_header) \
  93                                         - OCFS2_XATTR_HEADER_GAP)
  94
  95static struct ocfs2_xattr_def_value_root def_xv = {
  96        .xv.xr_list.l_count = cpu_to_le16(1),
  97};
  98
  99struct xattr_handler *ocfs2_xattr_handlers[] = {
 100        &ocfs2_xattr_user_handler,
 101#ifdef CONFIG_OCFS2_FS_POSIX_ACL
 102        &ocfs2_xattr_acl_access_handler,
 103        &ocfs2_xattr_acl_default_handler,
 104#endif
 105        &ocfs2_xattr_trusted_handler,
 106        &ocfs2_xattr_security_handler,
 107        NULL
 108};
 109
 110static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
 111        [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
 112#ifdef CONFIG_OCFS2_FS_POSIX_ACL
 113        [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
 114                                        = &ocfs2_xattr_acl_access_handler,
 115        [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
 116                                        = &ocfs2_xattr_acl_default_handler,
 117#endif
 118        [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
 119        [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
 120};
 121
 122struct ocfs2_xattr_info {
 123        int name_index;
 124        const char *name;
 125        const void *value;
 126        size_t value_len;
 127};
 128
 129struct ocfs2_xattr_search {
 130        struct buffer_head *inode_bh;
 131        /*
 132         * xattr_bh point to the block buffer head which has extended attribute
 133         * when extended attribute in inode, xattr_bh is equal to inode_bh.
 134         */
 135        struct buffer_head *xattr_bh;
 136        struct ocfs2_xattr_header *header;
 137        struct ocfs2_xattr_bucket *bucket;
 138        void *base;
 139        void *end;
 140        struct ocfs2_xattr_entry *here;
 141        int not_found;
 142};
 143
 144static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
 145                                             struct ocfs2_xattr_header *xh,
 146                                             int index,
 147                                             int *block_off,
 148                                             int *new_offset);
 149
 150static int ocfs2_xattr_block_find(struct inode *inode,
 151                                  int name_index,
 152                                  const char *name,
 153                                  struct ocfs2_xattr_search *xs);
 154static int ocfs2_xattr_index_block_find(struct inode *inode,
 155                                        struct buffer_head *root_bh,
 156                                        int name_index,
 157                                        const char *name,
 158                                        struct ocfs2_xattr_search *xs);
 159
 160static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
 161                                        struct buffer_head *blk_bh,
 162                                        char *buffer,
 163                                        size_t buffer_size);
 164
 165static int ocfs2_xattr_create_index_block(struct inode *inode,
 166                                          struct ocfs2_xattr_search *xs,
 167                                          struct ocfs2_xattr_set_ctxt *ctxt);
 168
 169static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
 170                                             struct ocfs2_xattr_info *xi,
 171                                             struct ocfs2_xattr_search *xs,
 172                                             struct ocfs2_xattr_set_ctxt *ctxt);
 173
 174typedef int (xattr_tree_rec_func)(struct inode *inode,
 175                                  struct buffer_head *root_bh,
 176                                  u64 blkno, u32 cpos, u32 len, void *para);
 177static int ocfs2_iterate_xattr_index_block(struct inode *inode,
 178                                           struct buffer_head *root_bh,
 179                                           xattr_tree_rec_func *rec_func,
 180                                           void *para);
 181static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
 182                                        struct ocfs2_xattr_bucket *bucket,
 183                                        void *para);
 184static int ocfs2_rm_xattr_cluster(struct inode *inode,
 185                                  struct buffer_head *root_bh,
 186                                  u64 blkno,
 187                                  u32 cpos,
 188                                  u32 len,
 189                                  void *para);
 190
 191static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
 192                                  u64 src_blk, u64 last_blk, u64 to_blk,
 193                                  unsigned int start_bucket,
 194                                  u32 *first_hash);
 195static int ocfs2_prepare_refcount_xattr(struct inode *inode,
 196                                        struct ocfs2_dinode *di,
 197                                        struct ocfs2_xattr_info *xi,
 198                                        struct ocfs2_xattr_search *xis,
 199                                        struct ocfs2_xattr_search *xbs,
 200                                        struct ocfs2_refcount_tree **ref_tree,
 201                                        int *meta_need,
 202                                        int *credits);
 203static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 204                                           struct ocfs2_xattr_bucket *bucket,
 205                                           int offset,
 206                                           struct ocfs2_xattr_value_root **xv,
 207                                           struct buffer_head **bh);
 208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
 209                                    const void *value, size_t size, int flags);
 210
 211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 212{
 213        return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
 214}
 215
 216static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
 217{
 218        return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
 219}
 220
 221static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
 222{
 223        u16 len = sb->s_blocksize -
 224                 offsetof(struct ocfs2_xattr_header, xh_entries);
 225
 226        return len / sizeof(struct ocfs2_xattr_entry);
 227}
 228
 229#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
 230#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
 231#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
 232
 233static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
 234{
 235        struct ocfs2_xattr_bucket *bucket;
 236        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 237
 238        BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
 239
 240        bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
 241        if (bucket) {
 242                bucket->bu_inode = inode;
 243                bucket->bu_blocks = blks;
 244        }
 245
 246        return bucket;
 247}
 248
 249static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
 250{
 251        int i;
 252
 253        for (i = 0; i < bucket->bu_blocks; i++) {
 254                brelse(bucket->bu_bhs[i]);
 255                bucket->bu_bhs[i] = NULL;
 256        }
 257}
 258
 259static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
 260{
 261        if (bucket) {
 262                ocfs2_xattr_bucket_relse(bucket);
 263                bucket->bu_inode = NULL;
 264                kfree(bucket);
 265        }
 266}
 267
 268/*
 269 * A bucket that has never been written to disk doesn't need to be
 270 * read.  We just need the buffer_heads.  Don't call this for
 271 * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
 272 * them fully.
 273 */
 274static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 275                                   u64 xb_blkno)
 276{
 277        int i, rc = 0;
 278
 279        for (i = 0; i < bucket->bu_blocks; i++) {
 280                bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
 281                                              xb_blkno + i);
 282                if (!bucket->bu_bhs[i]) {
 283                        rc = -EIO;
 284                        mlog_errno(rc);
 285                        break;
 286                }
 287
 288                if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 289                                           bucket->bu_bhs[i]))
 290                        ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
 291                                                      bucket->bu_bhs[i]);
 292        }
 293
 294        if (rc)
 295                ocfs2_xattr_bucket_relse(bucket);
 296        return rc;
 297}
 298
 299/* Read the xattr bucket at xb_blkno */
 300static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
 301                                   u64 xb_blkno)
 302{
 303        int rc;
 304
 305        rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
 306                               bucket->bu_blocks, bucket->bu_bhs, 0,
 307                               NULL);
 308        if (!rc) {
 309                spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 310                rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
 311                                                 bucket->bu_bhs,
 312                                                 bucket->bu_blocks,
 313                                                 &bucket_xh(bucket)->xh_check);
 314                spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 315                if (rc)
 316                        mlog_errno(rc);
 317        }
 318
 319        if (rc)
 320                ocfs2_xattr_bucket_relse(bucket);
 321        return rc;
 322}
 323
 324static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
 325                                             struct ocfs2_xattr_bucket *bucket,
 326                                             int type)
 327{
 328        int i, rc = 0;
 329
 330        for (i = 0; i < bucket->bu_blocks; i++) {
 331                rc = ocfs2_journal_access(handle,
 332                                          INODE_CACHE(bucket->bu_inode),
 333                                          bucket->bu_bhs[i], type);
 334                if (rc) {
 335                        mlog_errno(rc);
 336                        break;
 337                }
 338        }
 339
 340        return rc;
 341}
 342
 343static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 344                                             struct ocfs2_xattr_bucket *bucket)
 345{
 346        int i;
 347
 348        spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 349        ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
 350                                   bucket->bu_bhs, bucket->bu_blocks,
 351                                   &bucket_xh(bucket)->xh_check);
 352        spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
 353
 354        for (i = 0; i < bucket->bu_blocks; i++)
 355                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
 356}
 357
 358static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
 359                                         struct ocfs2_xattr_bucket *src)
 360{
 361        int i;
 362        int blocksize = src->bu_inode->i_sb->s_blocksize;
 363
 364        BUG_ON(dest->bu_blocks != src->bu_blocks);
 365        BUG_ON(dest->bu_inode != src->bu_inode);
 366
 367        for (i = 0; i < src->bu_blocks; i++) {
 368                memcpy(bucket_block(dest, i), bucket_block(src, i),
 369                       blocksize);
 370        }
 371}
 372
 373static int ocfs2_validate_xattr_block(struct super_block *sb,
 374                                      struct buffer_head *bh)
 375{
 376        int rc;
 377        struct ocfs2_xattr_block *xb =
 378                (struct ocfs2_xattr_block *)bh->b_data;
 379
 380        mlog(0, "Validating xattr block %llu\n",
 381             (unsigned long long)bh->b_blocknr);
 382
 383        BUG_ON(!buffer_uptodate(bh));
 384
 385        /*
 386         * If the ecc fails, we return the error but otherwise
 387         * leave the filesystem running.  We know any error is
 388         * local to this block.
 389         */
 390        rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
 391        if (rc)
 392                return rc;
 393
 394        /*
 395         * Errors after here are fatal
 396         */
 397
 398        if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
 399                ocfs2_error(sb,
 400                            "Extended attribute block #%llu has bad "
 401                            "signature %.*s",
 402                            (unsigned long long)bh->b_blocknr, 7,
 403                            xb->xb_signature);
 404                return -EINVAL;
 405        }
 406
 407        if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
 408                ocfs2_error(sb,
 409                            "Extended attribute block #%llu has an "
 410                            "invalid xb_blkno of %llu",
 411                            (unsigned long long)bh->b_blocknr,
 412                            (unsigned long long)le64_to_cpu(xb->xb_blkno));
 413                return -EINVAL;
 414        }
 415
 416        if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
 417                ocfs2_error(sb,
 418                            "Extended attribute block #%llu has an invalid "
 419                            "xb_fs_generation of #%u",
 420                            (unsigned long long)bh->b_blocknr,
 421                            le32_to_cpu(xb->xb_fs_generation));
 422                return -EINVAL;
 423        }
 424
 425        return 0;
 426}
 427
 428static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
 429                                  struct buffer_head **bh)
 430{
 431        int rc;
 432        struct buffer_head *tmp = *bh;
 433
 434        rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
 435                              ocfs2_validate_xattr_block);
 436
 437        /* If ocfs2_read_block() got us a new bh, pass it up. */
 438        if (!rc && !*bh)
 439                *bh = tmp;
 440
 441        return rc;
 442}
 443
 444static inline const char *ocfs2_xattr_prefix(int name_index)
 445{
 446        struct xattr_handler *handler = NULL;
 447
 448        if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
 449                handler = ocfs2_xattr_handler_map[name_index];
 450
 451        return handler ? handler->prefix : NULL;
 452}
 453
 454static u32 ocfs2_xattr_name_hash(struct inode *inode,
 455                                 const char *name,
 456                                 int name_len)
 457{
 458        /* Get hash value of uuid from super block */
 459        u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
 460        int i;
 461
 462        /* hash extended attribute name */
 463        for (i = 0; i < name_len; i++) {
 464                hash = (hash << OCFS2_HASH_SHIFT) ^
 465                       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
 466                       *name++;
 467        }
 468
 469        return hash;
 470}
 471
 472/*
 473 * ocfs2_xattr_hash_entry()
 474 *
 475 * Compute the hash of an extended attribute.
 476 */
 477static void ocfs2_xattr_hash_entry(struct inode *inode,
 478                                   struct ocfs2_xattr_header *header,
 479                                   struct ocfs2_xattr_entry *entry)
 480{
 481        u32 hash = 0;
 482        char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
 483
 484        hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
 485        entry->xe_name_hash = cpu_to_le32(hash);
 486
 487        return;
 488}
 489
 490static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
 491{
 492        int size = 0;
 493
 494        if (value_len <= OCFS2_XATTR_INLINE_SIZE)
 495                size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
 496        else
 497                size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
 498        size += sizeof(struct ocfs2_xattr_entry);
 499
 500        return size;
 501}
 502
 503int ocfs2_calc_security_init(struct inode *dir,
 504                             struct ocfs2_security_xattr_info *si,
 505                             int *want_clusters,
 506                             int *xattr_credits,
 507                             struct ocfs2_alloc_context **xattr_ac)
 508{
 509        int ret = 0;
 510        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 511        int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 512                                                 si->value_len);
 513
 514        /*
 515         * The max space of security xattr taken inline is
 516         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 517         * So reserve one metadata block for it is ok.
 518         */
 519        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 520            s_size > OCFS2_XATTR_FREE_IN_IBODY) {
 521                ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
 522                if (ret) {
 523                        mlog_errno(ret);
 524                        return ret;
 525                }
 526                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 527        }
 528
 529        /* reserve clusters for xattr value which will be set in B tree*/
 530        if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 531                int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 532                                                            si->value_len);
 533
 534                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 535                                                           new_clusters);
 536                *want_clusters += new_clusters;
 537        }
 538        return ret;
 539}
 540
 541int ocfs2_calc_xattr_init(struct inode *dir,
 542                          struct buffer_head *dir_bh,
 543                          int mode,
 544                          struct ocfs2_security_xattr_info *si,
 545                          int *want_clusters,
 546                          int *xattr_credits,
 547                          int *want_meta)
 548{
 549        int ret = 0;
 550        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 551        int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
 552
 553        if (si->enable)
 554                s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
 555                                                     si->value_len);
 556
 557        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
 558                acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
 559                                        OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
 560                                        "", NULL, 0);
 561                if (acl_len > 0) {
 562                        a_size = ocfs2_xattr_entry_real_size(0, acl_len);
 563                        if (S_ISDIR(mode))
 564                                a_size <<= 1;
 565                } else if (acl_len != 0 && acl_len != -ENODATA) {
 566                        mlog_errno(ret);
 567                        return ret;
 568                }
 569        }
 570
 571        if (!(s_size + a_size))
 572                return ret;
 573
 574        /*
 575         * The max space of security xattr taken inline is
 576         * 256(name) + 80(value) + 16(entry) = 352 bytes,
 577         * The max space of acl xattr taken inline is
 578         * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
 579         * when blocksize = 512, may reserve one more cluser for
 580         * xattr bucket, otherwise reserve one metadata block
 581         * for them is ok.
 582         * If this is a new directory with inline data,
 583         * we choose to reserve the entire inline area for
 584         * directory contents and force an external xattr block.
 585         */
 586        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
 587            (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
 588            (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
 589                *want_meta = *want_meta + 1;
 590                *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
 591        }
 592
 593        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
 594            (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
 595                *want_clusters += 1;
 596                *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
 597        }
 598
 599        /*
 600         * reserve credits and clusters for xattrs which has large value
 601         * and have to be set outside
 602         */
 603        if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
 604                new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
 605                                                        si->value_len);
 606                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 607                                                           new_clusters);
 608                *want_clusters += new_clusters;
 609        }
 610        if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
 611            acl_len > OCFS2_XATTR_INLINE_SIZE) {
 612                /* for directory, it has DEFAULT and ACCESS two types of acls */
 613                new_clusters = (S_ISDIR(mode) ? 2 : 1) *
 614                                ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
 615                *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
 616                                                           new_clusters);
 617                *want_clusters += new_clusters;
 618        }
 619
 620        return ret;
 621}
 622
 623static int ocfs2_xattr_extend_allocation(struct inode *inode,
 624                                         u32 clusters_to_add,
 625                                         struct ocfs2_xattr_value_buf *vb,
 626                                         struct ocfs2_xattr_set_ctxt *ctxt)
 627{
 628        int status = 0;
 629        handle_t *handle = ctxt->handle;
 630        enum ocfs2_alloc_restarted why;
 631        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
 632        struct ocfs2_extent_tree et;
 633
 634        mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
 635
 636        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 637
 638        status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 639                              OCFS2_JOURNAL_ACCESS_WRITE);
 640        if (status < 0) {
 641                mlog_errno(status);
 642                goto leave;
 643        }
 644
 645        prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 646        status = ocfs2_add_clusters_in_btree(handle,
 647                                             &et,
 648                                             &logical_start,
 649                                             clusters_to_add,
 650                                             0,
 651                                             ctxt->data_ac,
 652                                             ctxt->meta_ac,
 653                                             &why);
 654        if (status < 0) {
 655                mlog_errno(status);
 656                goto leave;
 657        }
 658
 659        status = ocfs2_journal_dirty(handle, vb->vb_bh);
 660        if (status < 0) {
 661                mlog_errno(status);
 662                goto leave;
 663        }
 664
 665        clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
 666
 667        /*
 668         * We should have already allocated enough space before the transaction,
 669         * so no need to restart.
 670         */
 671        BUG_ON(why != RESTART_NONE || clusters_to_add);
 672
 673leave:
 674
 675        return status;
 676}
 677
 678static int __ocfs2_remove_xattr_range(struct inode *inode,
 679                                      struct ocfs2_xattr_value_buf *vb,
 680                                      u32 cpos, u32 phys_cpos, u32 len,
 681                                      unsigned int ext_flags,
 682                                      struct ocfs2_xattr_set_ctxt *ctxt)
 683{
 684        int ret;
 685        u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 686        handle_t *handle = ctxt->handle;
 687        struct ocfs2_extent_tree et;
 688
 689        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 690
 691        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
 692                            OCFS2_JOURNAL_ACCESS_WRITE);
 693        if (ret) {
 694                mlog_errno(ret);
 695                goto out;
 696        }
 697
 698        ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
 699                                  &ctxt->dealloc);
 700        if (ret) {
 701                mlog_errno(ret);
 702                goto out;
 703        }
 704
 705        le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
 706
 707        ret = ocfs2_journal_dirty(handle, vb->vb_bh);
 708        if (ret) {
 709                mlog_errno(ret);
 710                goto out;
 711        }
 712
 713        if (ext_flags & OCFS2_EXT_REFCOUNTED)
 714                ret = ocfs2_decrease_refcount(inode, handle,
 715                                        ocfs2_blocks_to_clusters(inode->i_sb,
 716                                                                 phys_blkno),
 717                                        len, ctxt->meta_ac, &ctxt->dealloc, 1);
 718        else
 719                ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
 720                                                  phys_blkno, len);
 721        if (ret)
 722                mlog_errno(ret);
 723
 724out:
 725        return ret;
 726}
 727
 728static int ocfs2_xattr_shrink_size(struct inode *inode,
 729                                   u32 old_clusters,
 730                                   u32 new_clusters,
 731                                   struct ocfs2_xattr_value_buf *vb,
 732                                   struct ocfs2_xattr_set_ctxt *ctxt)
 733{
 734        int ret = 0;
 735        unsigned int ext_flags;
 736        u32 trunc_len, cpos, phys_cpos, alloc_size;
 737        u64 block;
 738
 739        if (old_clusters <= new_clusters)
 740                return 0;
 741
 742        cpos = new_clusters;
 743        trunc_len = old_clusters - new_clusters;
 744        while (trunc_len) {
 745                ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
 746                                               &alloc_size,
 747                                               &vb->vb_xv->xr_list, &ext_flags);
 748                if (ret) {
 749                        mlog_errno(ret);
 750                        goto out;
 751                }
 752
 753                if (alloc_size > trunc_len)
 754                        alloc_size = trunc_len;
 755
 756                ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
 757                                                 phys_cpos, alloc_size,
 758                                                 ext_flags, ctxt);
 759                if (ret) {
 760                        mlog_errno(ret);
 761                        goto out;
 762                }
 763
 764                block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
 765                ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
 766                                                       block, alloc_size);
 767                cpos += alloc_size;
 768                trunc_len -= alloc_size;
 769        }
 770
 771out:
 772        return ret;
 773}
 774
 775static int ocfs2_xattr_value_truncate(struct inode *inode,
 776                                      struct ocfs2_xattr_value_buf *vb,
 777                                      int len,
 778                                      struct ocfs2_xattr_set_ctxt *ctxt)
 779{
 780        int ret;
 781        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
 782        u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
 783
 784        if (new_clusters == old_clusters)
 785                return 0;
 786
 787        if (new_clusters > old_clusters)
 788                ret = ocfs2_xattr_extend_allocation(inode,
 789                                                    new_clusters - old_clusters,
 790                                                    vb, ctxt);
 791        else
 792                ret = ocfs2_xattr_shrink_size(inode,
 793                                              old_clusters, new_clusters,
 794                                              vb, ctxt);
 795
 796        return ret;
 797}
 798
 799static int ocfs2_xattr_list_entry(char *buffer, size_t size,
 800                                  size_t *result, const char *prefix,
 801                                  const char *name, int name_len)
 802{
 803        char *p = buffer + *result;
 804        int prefix_len = strlen(prefix);
 805        int total_len = prefix_len + name_len + 1;
 806
 807        *result += total_len;
 808
 809        /* we are just looking for how big our buffer needs to be */
 810        if (!size)
 811                return 0;
 812
 813        if (*result > size)
 814                return -ERANGE;
 815
 816        memcpy(p, prefix, prefix_len);
 817        memcpy(p + prefix_len, name, name_len);
 818        p[prefix_len + name_len] = '\0';
 819
 820        return 0;
 821}
 822
 823static int ocfs2_xattr_list_entries(struct inode *inode,
 824                                    struct ocfs2_xattr_header *header,
 825                                    char *buffer, size_t buffer_size)
 826{
 827        size_t result = 0;
 828        int i, type, ret;
 829        const char *prefix, *name;
 830
 831        for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
 832                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
 833                type = ocfs2_xattr_get_type(entry);
 834                prefix = ocfs2_xattr_prefix(type);
 835
 836                if (prefix) {
 837                        name = (const char *)header +
 838                                le16_to_cpu(entry->xe_name_offset);
 839
 840                        ret = ocfs2_xattr_list_entry(buffer, buffer_size,
 841                                                     &result, prefix, name,
 842                                                     entry->xe_name_len);
 843                        if (ret)
 844                                return ret;
 845                }
 846        }
 847
 848        return result;
 849}
 850
 851int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
 852                                         struct ocfs2_dinode *di)
 853{
 854        struct ocfs2_xattr_header *xh;
 855        int i;
 856
 857        xh = (struct ocfs2_xattr_header *)
 858                 ((void *)di + inode->i_sb->s_blocksize -
 859                 le16_to_cpu(di->i_xattr_inline_size));
 860
 861        for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
 862                if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
 863                        return 1;
 864
 865        return 0;
 866}
 867
 868static int ocfs2_xattr_ibody_list(struct inode *inode,
 869                                  struct ocfs2_dinode *di,
 870                                  char *buffer,
 871                                  size_t buffer_size)
 872{
 873        struct ocfs2_xattr_header *header = NULL;
 874        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 875        int ret = 0;
 876
 877        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
 878                return ret;
 879
 880        header = (struct ocfs2_xattr_header *)
 881                 ((void *)di + inode->i_sb->s_blocksize -
 882                 le16_to_cpu(di->i_xattr_inline_size));
 883
 884        ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
 885
 886        return ret;
 887}
 888
 889static int ocfs2_xattr_block_list(struct inode *inode,
 890                                  struct ocfs2_dinode *di,
 891                                  char *buffer,
 892                                  size_t buffer_size)
 893{
 894        struct buffer_head *blk_bh = NULL;
 895        struct ocfs2_xattr_block *xb;
 896        int ret = 0;
 897
 898        if (!di->i_xattr_loc)
 899                return ret;
 900
 901        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
 902                                     &blk_bh);
 903        if (ret < 0) {
 904                mlog_errno(ret);
 905                return ret;
 906        }
 907
 908        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
 909        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
 910                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
 911                ret = ocfs2_xattr_list_entries(inode, header,
 912                                               buffer, buffer_size);
 913        } else
 914                ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
 915                                                   buffer, buffer_size);
 916
 917        brelse(blk_bh);
 918
 919        return ret;
 920}
 921
 922ssize_t ocfs2_listxattr(struct dentry *dentry,
 923                        char *buffer,
 924                        size_t size)
 925{
 926        int ret = 0, i_ret = 0, b_ret = 0;
 927        struct buffer_head *di_bh = NULL;
 928        struct ocfs2_dinode *di = NULL;
 929        struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
 930
 931        if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
 932                return -EOPNOTSUPP;
 933
 934        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
 935                return ret;
 936
 937        ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
 938        if (ret < 0) {
 939                mlog_errno(ret);
 940                return ret;
 941        }
 942
 943        di = (struct ocfs2_dinode *)di_bh->b_data;
 944
 945        down_read(&oi->ip_xattr_sem);
 946        i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
 947        if (i_ret < 0)
 948                b_ret = 0;
 949        else {
 950                if (buffer) {
 951                        buffer += i_ret;
 952                        size -= i_ret;
 953                }
 954                b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
 955                                               buffer, size);
 956                if (b_ret < 0)
 957                        i_ret = 0;
 958        }
 959        up_read(&oi->ip_xattr_sem);
 960        ocfs2_inode_unlock(dentry->d_inode, 0);
 961
 962        brelse(di_bh);
 963
 964        return i_ret + b_ret;
 965}
 966
 967static int ocfs2_xattr_find_entry(int name_index,
 968                                  const char *name,
 969                                  struct ocfs2_xattr_search *xs)
 970{
 971        struct ocfs2_xattr_entry *entry;
 972        size_t name_len;
 973        int i, cmp = 1;
 974
 975        if (name == NULL)
 976                return -EINVAL;
 977
 978        name_len = strlen(name);
 979        entry = xs->here;
 980        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
 981                cmp = name_index - ocfs2_xattr_get_type(entry);
 982                if (!cmp)
 983                        cmp = name_len - entry->xe_name_len;
 984                if (!cmp)
 985                        cmp = memcmp(name, (xs->base +
 986                                     le16_to_cpu(entry->xe_name_offset)),
 987                                     name_len);
 988                if (cmp == 0)
 989                        break;
 990                entry += 1;
 991        }
 992        xs->here = entry;
 993
 994        return cmp ? -ENODATA : 0;
 995}
 996
 997static int ocfs2_xattr_get_value_outside(struct inode *inode,
 998                                         struct ocfs2_xattr_value_root *xv,
 999                                         void *buffer,
1000                                         size_t len)
1001{
1002        u32 cpos, p_cluster, num_clusters, bpc, clusters;
1003        u64 blkno;
1004        int i, ret = 0;
1005        size_t cplen, blocksize;
1006        struct buffer_head *bh = NULL;
1007        struct ocfs2_extent_list *el;
1008
1009        el = &xv->xr_list;
1010        clusters = le32_to_cpu(xv->xr_clusters);
1011        bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1012        blocksize = inode->i_sb->s_blocksize;
1013
1014        cpos = 0;
1015        while (cpos < clusters) {
1016                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1017                                               &num_clusters, el, NULL);
1018                if (ret) {
1019                        mlog_errno(ret);
1020                        goto out;
1021                }
1022
1023                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1024                /* Copy ocfs2_xattr_value */
1025                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1026                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1027                                               &bh, NULL);
1028                        if (ret) {
1029                                mlog_errno(ret);
1030                                goto out;
1031                        }
1032
1033                        cplen = len >= blocksize ? blocksize : len;
1034                        memcpy(buffer, bh->b_data, cplen);
1035                        len -= cplen;
1036                        buffer += cplen;
1037
1038                        brelse(bh);
1039                        bh = NULL;
1040                        if (len == 0)
1041                                break;
1042                }
1043                cpos += num_clusters;
1044        }
1045out:
1046        return ret;
1047}
1048
1049static int ocfs2_xattr_ibody_get(struct inode *inode,
1050                                 int name_index,
1051                                 const char *name,
1052                                 void *buffer,
1053                                 size_t buffer_size,
1054                                 struct ocfs2_xattr_search *xs)
1055{
1056        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1057        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1058        struct ocfs2_xattr_value_root *xv;
1059        size_t size;
1060        int ret = 0;
1061
1062        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1063                return -ENODATA;
1064
1065        xs->end = (void *)di + inode->i_sb->s_blocksize;
1066        xs->header = (struct ocfs2_xattr_header *)
1067                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1068        xs->base = (void *)xs->header;
1069        xs->here = xs->header->xh_entries;
1070
1071        ret = ocfs2_xattr_find_entry(name_index, name, xs);
1072        if (ret)
1073                return ret;
1074        size = le64_to_cpu(xs->here->xe_value_size);
1075        if (buffer) {
1076                if (size > buffer_size)
1077                        return -ERANGE;
1078                if (ocfs2_xattr_is_local(xs->here)) {
1079                        memcpy(buffer, (void *)xs->base +
1080                               le16_to_cpu(xs->here->xe_name_offset) +
1081                               OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1082                } else {
1083                        xv = (struct ocfs2_xattr_value_root *)
1084                                (xs->base + le16_to_cpu(
1085                                 xs->here->xe_name_offset) +
1086                                OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1087                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1088                                                            buffer, size);
1089                        if (ret < 0) {
1090                                mlog_errno(ret);
1091                                return ret;
1092                        }
1093                }
1094        }
1095
1096        return size;
1097}
1098
1099static int ocfs2_xattr_block_get(struct inode *inode,
1100                                 int name_index,
1101                                 const char *name,
1102                                 void *buffer,
1103                                 size_t buffer_size,
1104                                 struct ocfs2_xattr_search *xs)
1105{
1106        struct ocfs2_xattr_block *xb;
1107        struct ocfs2_xattr_value_root *xv;
1108        size_t size;
1109        int ret = -ENODATA, name_offset, name_len, i;
1110        int uninitialized_var(block_off);
1111
1112        xs->bucket = ocfs2_xattr_bucket_new(inode);
1113        if (!xs->bucket) {
1114                ret = -ENOMEM;
1115                mlog_errno(ret);
1116                goto cleanup;
1117        }
1118
1119        ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1120        if (ret) {
1121                mlog_errno(ret);
1122                goto cleanup;
1123        }
1124
1125        if (xs->not_found) {
1126                ret = -ENODATA;
1127                goto cleanup;
1128        }
1129
1130        xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1131        size = le64_to_cpu(xs->here->xe_value_size);
1132        if (buffer) {
1133                ret = -ERANGE;
1134                if (size > buffer_size)
1135                        goto cleanup;
1136
1137                name_offset = le16_to_cpu(xs->here->xe_name_offset);
1138                name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1139                i = xs->here - xs->header->xh_entries;
1140
1141                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1142                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1143                                                                bucket_xh(xs->bucket),
1144                                                                i,
1145                                                                &block_off,
1146                                                                &name_offset);
1147                        xs->base = bucket_block(xs->bucket, block_off);
1148                }
1149                if (ocfs2_xattr_is_local(xs->here)) {
1150                        memcpy(buffer, (void *)xs->base +
1151                               name_offset + name_len, size);
1152                } else {
1153                        xv = (struct ocfs2_xattr_value_root *)
1154                                (xs->base + name_offset + name_len);
1155                        ret = ocfs2_xattr_get_value_outside(inode, xv,
1156                                                            buffer, size);
1157                        if (ret < 0) {
1158                                mlog_errno(ret);
1159                                goto cleanup;
1160                        }
1161                }
1162        }
1163        ret = size;
1164cleanup:
1165        ocfs2_xattr_bucket_free(xs->bucket);
1166
1167        brelse(xs->xattr_bh);
1168        xs->xattr_bh = NULL;
1169        return ret;
1170}
1171
1172int ocfs2_xattr_get_nolock(struct inode *inode,
1173                           struct buffer_head *di_bh,
1174                           int name_index,
1175                           const char *name,
1176                           void *buffer,
1177                           size_t buffer_size)
1178{
1179        int ret;
1180        struct ocfs2_dinode *di = NULL;
1181        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1182        struct ocfs2_xattr_search xis = {
1183                .not_found = -ENODATA,
1184        };
1185        struct ocfs2_xattr_search xbs = {
1186                .not_found = -ENODATA,
1187        };
1188
1189        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1190                return -EOPNOTSUPP;
1191
1192        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1193                ret = -ENODATA;
1194
1195        xis.inode_bh = xbs.inode_bh = di_bh;
1196        di = (struct ocfs2_dinode *)di_bh->b_data;
1197
1198        down_read(&oi->ip_xattr_sem);
1199        ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1200                                    buffer_size, &xis);
1201        if (ret == -ENODATA && di->i_xattr_loc)
1202                ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1203                                            buffer_size, &xbs);
1204        up_read(&oi->ip_xattr_sem);
1205
1206        return ret;
1207}
1208
1209/* ocfs2_xattr_get()
1210 *
1211 * Copy an extended attribute into the buffer provided.
1212 * Buffer is NULL to compute the size of buffer required.
1213 */
1214static int ocfs2_xattr_get(struct inode *inode,
1215                           int name_index,
1216                           const char *name,
1217                           void *buffer,
1218                           size_t buffer_size)
1219{
1220        int ret;
1221        struct buffer_head *di_bh = NULL;
1222
1223        ret = ocfs2_inode_lock(inode, &di_bh, 0);
1224        if (ret < 0) {
1225                mlog_errno(ret);
1226                return ret;
1227        }
1228        ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1229                                     name, buffer, buffer_size);
1230
1231        ocfs2_inode_unlock(inode, 0);
1232
1233        brelse(di_bh);
1234
1235        return ret;
1236}
1237
1238static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1239                                           handle_t *handle,
1240                                           struct ocfs2_xattr_value_buf *vb,
1241                                           const void *value,
1242                                           int value_len)
1243{
1244        int ret = 0, i, cp_len;
1245        u16 blocksize = inode->i_sb->s_blocksize;
1246        u32 p_cluster, num_clusters;
1247        u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1248        u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1249        u64 blkno;
1250        struct buffer_head *bh = NULL;
1251        unsigned int ext_flags;
1252        struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1253
1254        BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1255
1256        while (cpos < clusters) {
1257                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1258                                               &num_clusters, &xv->xr_list,
1259                                               &ext_flags);
1260                if (ret) {
1261                        mlog_errno(ret);
1262                        goto out;
1263                }
1264
1265                BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1266
1267                blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1268
1269                for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1270                        ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1271                                               &bh, NULL);
1272                        if (ret) {
1273                                mlog_errno(ret);
1274                                goto out;
1275                        }
1276
1277                        ret = ocfs2_journal_access(handle,
1278                                                   INODE_CACHE(inode),
1279                                                   bh,
1280                                                   OCFS2_JOURNAL_ACCESS_WRITE);
1281                        if (ret < 0) {
1282                                mlog_errno(ret);
1283                                goto out;
1284                        }
1285
1286                        cp_len = value_len > blocksize ? blocksize : value_len;
1287                        memcpy(bh->b_data, value, cp_len);
1288                        value_len -= cp_len;
1289                        value += cp_len;
1290                        if (cp_len < blocksize)
1291                                memset(bh->b_data + cp_len, 0,
1292                                       blocksize - cp_len);
1293
1294                        ret = ocfs2_journal_dirty(handle, bh);
1295                        if (ret < 0) {
1296                                mlog_errno(ret);
1297                                goto out;
1298                        }
1299                        brelse(bh);
1300                        bh = NULL;
1301
1302                        /*
1303                         * XXX: do we need to empty all the following
1304                         * blocks in this cluster?
1305                         */
1306                        if (!value_len)
1307                                break;
1308                }
1309                cpos += num_clusters;
1310        }
1311out:
1312        brelse(bh);
1313
1314        return ret;
1315}
1316
1317static int ocfs2_xattr_cleanup(struct inode *inode,
1318                               handle_t *handle,
1319                               struct ocfs2_xattr_info *xi,
1320                               struct ocfs2_xattr_search *xs,
1321                               struct ocfs2_xattr_value_buf *vb,
1322                               size_t offs)
1323{
1324        int ret = 0;
1325        size_t name_len = strlen(xi->name);
1326        void *val = xs->base + offs;
1327        size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1328
1329        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1330                            OCFS2_JOURNAL_ACCESS_WRITE);
1331        if (ret) {
1332                mlog_errno(ret);
1333                goto out;
1334        }
1335        /* Decrease xattr count */
1336        le16_add_cpu(&xs->header->xh_count, -1);
1337        /* Remove the xattr entry and tree root which has already be set*/
1338        memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1339        memset(val, 0, size);
1340
1341        ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1342        if (ret < 0)
1343                mlog_errno(ret);
1344out:
1345        return ret;
1346}
1347
1348static int ocfs2_xattr_update_entry(struct inode *inode,
1349                                    handle_t *handle,
1350                                    struct ocfs2_xattr_info *xi,
1351                                    struct ocfs2_xattr_search *xs,
1352                                    struct ocfs2_xattr_value_buf *vb,
1353                                    size_t offs)
1354{
1355        int ret;
1356
1357        ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1358                            OCFS2_JOURNAL_ACCESS_WRITE);
1359        if (ret) {
1360                mlog_errno(ret);
1361                goto out;
1362        }
1363
1364        xs->here->xe_name_offset = cpu_to_le16(offs);
1365        xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1366        if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1367                ocfs2_xattr_set_local(xs->here, 1);
1368        else
1369                ocfs2_xattr_set_local(xs->here, 0);
1370        ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1371
1372        ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1373        if (ret < 0)
1374                mlog_errno(ret);
1375out:
1376        return ret;
1377}
1378
1379/*
1380 * ocfs2_xattr_set_value_outside()
1381 *
1382 * Set large size value in B tree.
1383 */
1384static int ocfs2_xattr_set_value_outside(struct inode *inode,
1385                                         struct ocfs2_xattr_info *xi,
1386                                         struct ocfs2_xattr_search *xs,
1387                                         struct ocfs2_xattr_set_ctxt *ctxt,
1388                                         struct ocfs2_xattr_value_buf *vb,
1389                                         size_t offs)
1390{
1391        size_t name_len = strlen(xi->name);
1392        void *val = xs->base + offs;
1393        struct ocfs2_xattr_value_root *xv = NULL;
1394        size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1395        int ret = 0;
1396
1397        memset(val, 0, size);
1398        memcpy(val, xi->name, name_len);
1399        xv = (struct ocfs2_xattr_value_root *)
1400                (val + OCFS2_XATTR_SIZE(name_len));
1401        xv->xr_clusters = 0;
1402        xv->xr_last_eb_blk = 0;
1403        xv->xr_list.l_tree_depth = 0;
1404        xv->xr_list.l_count = cpu_to_le16(1);
1405        xv->xr_list.l_next_free_rec = 0;
1406        vb->vb_xv = xv;
1407
1408        ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1409        if (ret < 0) {
1410                mlog_errno(ret);
1411                return ret;
1412        }
1413        ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1414        if (ret < 0) {
1415                mlog_errno(ret);
1416                return ret;
1417        }
1418        ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1419                                              xi->value, xi->value_len);
1420        if (ret < 0)
1421                mlog_errno(ret);
1422
1423        return ret;
1424}
1425
1426/*
1427 * ocfs2_xattr_set_entry_local()
1428 *
1429 * Set, replace or remove extended attribute in local.
1430 */
1431static void ocfs2_xattr_set_entry_local(struct inode *inode,
1432                                        struct ocfs2_xattr_info *xi,
1433                                        struct ocfs2_xattr_search *xs,
1434                                        struct ocfs2_xattr_entry *last,
1435                                        size_t min_offs)
1436{
1437        size_t name_len = strlen(xi->name);
1438        int i;
1439
1440        if (xi->value && xs->not_found) {
1441                /* Insert the new xattr entry. */
1442                le16_add_cpu(&xs->header->xh_count, 1);
1443                ocfs2_xattr_set_type(last, xi->name_index);
1444                ocfs2_xattr_set_local(last, 1);
1445                last->xe_name_len = name_len;
1446        } else {
1447                void *first_val;
1448                void *val;
1449                size_t offs, size;
1450
1451                first_val = xs->base + min_offs;
1452                offs = le16_to_cpu(xs->here->xe_name_offset);
1453                val = xs->base + offs;
1454
1455                if (le64_to_cpu(xs->here->xe_value_size) >
1456                    OCFS2_XATTR_INLINE_SIZE)
1457                        size = OCFS2_XATTR_SIZE(name_len) +
1458                                OCFS2_XATTR_ROOT_SIZE;
1459                else
1460                        size = OCFS2_XATTR_SIZE(name_len) +
1461                        OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1462
1463                if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1464                                OCFS2_XATTR_SIZE(xi->value_len)) {
1465                        /* The old and the new value have the
1466                           same size. Just replace the value. */
1467                        ocfs2_xattr_set_local(xs->here, 1);
1468                        xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1469                        /* Clear value bytes. */
1470                        memset(val + OCFS2_XATTR_SIZE(name_len),
1471                               0,
1472                               OCFS2_XATTR_SIZE(xi->value_len));
1473                        memcpy(val + OCFS2_XATTR_SIZE(name_len),
1474                               xi->value,
1475                               xi->value_len);
1476                        return;
1477                }
1478                /* Remove the old name+value. */
1479                memmove(first_val + size, first_val, val - first_val);
1480                memset(first_val, 0, size);
1481                xs->here->xe_name_hash = 0;
1482                xs->here->xe_name_offset = 0;
1483                ocfs2_xattr_set_local(xs->here, 1);
1484                xs->here->xe_value_size = 0;
1485
1486                min_offs += size;
1487
1488                /* Adjust all value offsets. */
1489                last = xs->header->xh_entries;
1490                for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1491                        size_t o = le16_to_cpu(last->xe_name_offset);
1492
1493                        if (o < offs)
1494                                last->xe_name_offset = cpu_to_le16(o + size);
1495                        last += 1;
1496                }
1497
1498                if (!xi->value) {
1499                        /* Remove the old entry. */
1500                        last -= 1;
1501                        memmove(xs->here, xs->here + 1,
1502                                (void *)last - (void *)xs->here);
1503                        memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1504                        le16_add_cpu(&xs->header->xh_count, -1);
1505                }
1506        }
1507        if (xi->value) {
1508                /* Insert the new name+value. */
1509                size_t size = OCFS2_XATTR_SIZE(name_len) +
1510                                OCFS2_XATTR_SIZE(xi->value_len);
1511                void *val = xs->base + min_offs - size;
1512
1513                xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1514                memset(val, 0, size);
1515                memcpy(val, xi->name, name_len);
1516                memcpy(val + OCFS2_XATTR_SIZE(name_len),
1517                       xi->value,
1518                       xi->value_len);
1519                xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1520                ocfs2_xattr_set_local(xs->here, 1);
1521                ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1522        }
1523
1524        return;
1525}
1526
1527/*
1528 * ocfs2_xattr_set_entry()
1529 *
1530 * Set extended attribute entry into inode or block.
1531 *
1532 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1533 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1534 * then set value in B tree with set_value_outside().
1535 */
1536static int ocfs2_xattr_set_entry(struct inode *inode,
1537                                 struct ocfs2_xattr_info *xi,
1538                                 struct ocfs2_xattr_search *xs,
1539                                 struct ocfs2_xattr_set_ctxt *ctxt,
1540                                 int flag)
1541{
1542        struct ocfs2_xattr_entry *last;
1543        struct ocfs2_inode_info *oi = OCFS2_I(inode);
1544        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1545        size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1546        size_t size_l = 0;
1547        handle_t *handle = ctxt->handle;
1548        int free, i, ret;
1549        struct ocfs2_xattr_info xi_l = {
1550                .name_index = xi->name_index,
1551                .name = xi->name,
1552                .value = xi->value,
1553                .value_len = xi->value_len,
1554        };
1555        struct ocfs2_xattr_value_buf vb = {
1556                .vb_bh = xs->xattr_bh,
1557                .vb_access = ocfs2_journal_access_di,
1558        };
1559
1560        if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1561                BUG_ON(xs->xattr_bh == xs->inode_bh);
1562                vb.vb_access = ocfs2_journal_access_xb;
1563        } else
1564                BUG_ON(xs->xattr_bh != xs->inode_bh);
1565
1566        /* Compute min_offs, last and free space. */
1567        last = xs->header->xh_entries;
1568
1569        for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1570                size_t offs = le16_to_cpu(last->xe_name_offset);
1571                if (offs < min_offs)
1572                        min_offs = offs;
1573                last += 1;
1574        }
1575
1576        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1577        if (free < 0)
1578                return -EIO;
1579
1580        if (!xs->not_found) {
1581                size_t size = 0;
1582                if (ocfs2_xattr_is_local(xs->here))
1583                        size = OCFS2_XATTR_SIZE(name_len) +
1584                        OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1585                else
1586                        size = OCFS2_XATTR_SIZE(name_len) +
1587                                OCFS2_XATTR_ROOT_SIZE;
1588                free += (size + sizeof(struct ocfs2_xattr_entry));
1589        }
1590        /* Check free space in inode or block */
1591        if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1592                if (free < sizeof(struct ocfs2_xattr_entry) +
1593                           OCFS2_XATTR_SIZE(name_len) +
1594                           OCFS2_XATTR_ROOT_SIZE) {
1595                        ret = -ENOSPC;
1596                        goto out;
1597                }
1598                size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1599                xi_l.value = (void *)&def_xv;
1600                xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1601        } else if (xi->value) {
1602                if (free < sizeof(struct ocfs2_xattr_entry) +
1603                           OCFS2_XATTR_SIZE(name_len) +
1604                           OCFS2_XATTR_SIZE(xi->value_len)) {
1605                        ret = -ENOSPC;
1606                        goto out;
1607                }
1608        }
1609
1610        if (!xs->not_found) {
1611                /* For existing extended attribute */
1612                size_t size = OCFS2_XATTR_SIZE(name_len) +
1613                        OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1614                size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1615                void *val = xs->base + offs;
1616
1617                if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1618                        /* Replace existing local xattr with tree root */
1619                        ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1620                                                            ctxt, &vb, offs);
1621                        if (ret < 0)
1622                                mlog_errno(ret);
1623                        goto out;
1624                } else if (!ocfs2_xattr_is_local(xs->here)) {
1625                        /* For existing xattr which has value outside */
1626                        vb.vb_xv = (struct ocfs2_xattr_value_root *)
1627                                (val + OCFS2_XATTR_SIZE(name_len));
1628
1629                        if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1630                                /*
1631                                 * If new value need set outside also,
1632                                 * first truncate old value to new value,
1633                                 * then set new value with set_value_outside().
1634                                 */
1635                                ret = ocfs2_xattr_value_truncate(inode,
1636                                                                 &vb,
1637                                                                 xi->value_len,
1638                                                                 ctxt);
1639                                if (ret < 0) {
1640                                        mlog_errno(ret);
1641                                        goto out;
1642                                }
1643
1644                                ret = ocfs2_xattr_update_entry(inode,
1645                                                               handle,
1646                                                               xi,
1647                                                               xs,
1648                                                               &vb,
1649                                                               offs);
1650                                if (ret < 0) {
1651                                        mlog_errno(ret);
1652                                        goto out;
1653                                }
1654
1655                                ret = __ocfs2_xattr_set_value_outside(inode,
1656                                                                handle,
1657                                                                &vb,
1658                                                                xi->value,
1659                                                                xi->value_len);
1660                                if (ret < 0)
1661                                        mlog_errno(ret);
1662                                goto out;
1663                        } else {
1664                                /*
1665                                 * If new value need set in local,
1666                                 * just trucate old value to zero.
1667                                 */
1668                                 ret = ocfs2_xattr_value_truncate(inode,
1669                                                                  &vb,
1670                                                                  0,
1671                                                                  ctxt);
1672                                if (ret < 0)
1673                                        mlog_errno(ret);
1674                        }
1675                }
1676        }
1677
1678        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1679                                      OCFS2_JOURNAL_ACCESS_WRITE);
1680        if (ret) {
1681                mlog_errno(ret);
1682                goto out;
1683        }
1684
1685        if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1686                ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1687                                   OCFS2_JOURNAL_ACCESS_WRITE);
1688                if (ret) {
1689                        mlog_errno(ret);
1690                        goto out;
1691                }
1692        }
1693
1694        /*
1695         * Set value in local, include set tree root in local.
1696         * This is the first step for value size >INLINE_SIZE.
1697         */
1698        ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1699
1700        if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1701                ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1702                if (ret < 0) {
1703                        mlog_errno(ret);
1704                        goto out;
1705                }
1706        }
1707
1708        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1709            (flag & OCFS2_INLINE_XATTR_FL)) {
1710                struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1711                unsigned int xattrsize = osb->s_xattr_inline_size;
1712
1713                /*
1714                 * Adjust extent record count or inline data size
1715                 * to reserve space for extended attribute.
1716                 */
1717                if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1718                        struct ocfs2_inline_data *idata = &di->id2.i_data;
1719                        le16_add_cpu(&idata->id_count, -xattrsize);
1720                } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1721                        struct ocfs2_extent_list *el = &di->id2.i_list;
1722                        le16_add_cpu(&el->l_count, -(xattrsize /
1723                                        sizeof(struct ocfs2_extent_rec)));
1724                }
1725                di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1726        }
1727        /* Update xattr flag */
1728        spin_lock(&oi->ip_lock);
1729        oi->ip_dyn_features |= flag;
1730        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1731        spin_unlock(&oi->ip_lock);
1732
1733        ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1734        if (ret < 0)
1735                mlog_errno(ret);
1736
1737        if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1738                /*
1739                 * Set value outside in B tree.
1740                 * This is the second step for value size > INLINE_SIZE.
1741                 */
1742                size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1743                ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1744                                                    &vb, offs);
1745                if (ret < 0) {
1746                        int ret2;
1747
1748                        mlog_errno(ret);
1749                        /*
1750                         * If set value outside failed, we have to clean
1751                         * the junk tree root we have already set in local.
1752                         */
1753                        ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1754                                                   xi, xs, &vb, offs);
1755                        if (ret2 < 0)
1756                                mlog_errno(ret2);
1757                }
1758        }
1759out:
1760        return ret;
1761}
1762
1763/*
1764 * In xattr remove, if it is stored outside and refcounted, we may have
1765 * the chance to split the refcount tree. So need the allocators.
1766 */
1767static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
1768                                        struct ocfs2_xattr_value_root *xv,
1769                                        struct ocfs2_caching_info *ref_ci,
1770                                        struct buffer_head *ref_root_bh,
1771                                        struct ocfs2_alloc_context **meta_ac,
1772                                        int *ref_credits)
1773{
1774        int ret, meta_add = 0;
1775        u32 p_cluster, num_clusters;
1776        unsigned int ext_flags;
1777
1778        *ref_credits = 0;
1779        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
1780                                       &num_clusters,
1781                                       &xv->xr_list,
1782                                       &ext_flags);
1783        if (ret) {
1784                mlog_errno(ret);
1785                goto out;
1786        }
1787
1788        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
1789                goto out;
1790
1791        ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
1792                                                 ref_root_bh, xv,
1793                                                 &meta_add, ref_credits);
1794        if (ret) {
1795                mlog_errno(ret);
1796                goto out;
1797        }
1798
1799        ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
1800                                                meta_add, meta_ac);
1801        if (ret)
1802                mlog_errno(ret);
1803
1804out:
1805        return ret;
1806}
1807
1808static int ocfs2_remove_value_outside(struct inode*inode,
1809                                      struct ocfs2_xattr_value_buf *vb,
1810                                      struct ocfs2_xattr_header *header,
1811                                      struct ocfs2_caching_info *ref_ci,
1812                                      struct buffer_head *ref_root_bh)
1813{
1814        int ret = 0, i, ref_credits;
1815        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1816        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1817        void *val;
1818
1819        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1820
1821        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1822                struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1823
1824                if (ocfs2_xattr_is_local(entry))
1825                        continue;
1826
1827                val = (void *)header +
1828                        le16_to_cpu(entry->xe_name_offset);
1829                vb->vb_xv = (struct ocfs2_xattr_value_root *)
1830                        (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1831
1832                ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
1833                                                         ref_ci, ref_root_bh,
1834                                                         &ctxt.meta_ac,
1835                                                         &ref_credits);
1836
1837                ctxt.handle = ocfs2_start_trans(osb, ref_credits +
1838                                        ocfs2_remove_extent_credits(osb->sb));
1839                if (IS_ERR(ctxt.handle)) {
1840                        ret = PTR_ERR(ctxt.handle);
1841                        mlog_errno(ret);
1842                        break;
1843                }
1844
1845                ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1846                if (ret < 0) {
1847                        mlog_errno(ret);
1848                        break;
1849                }
1850
1851                ocfs2_commit_trans(osb, ctxt.handle);
1852                if (ctxt.meta_ac) {
1853                        ocfs2_free_alloc_context(ctxt.meta_ac);
1854                        ctxt.meta_ac = NULL;
1855                }
1856        }
1857
1858        if (ctxt.meta_ac)
1859                ocfs2_free_alloc_context(ctxt.meta_ac);
1860        ocfs2_schedule_truncate_log_flush(osb, 1);
1861        ocfs2_run_deallocs(osb, &ctxt.dealloc);
1862        return ret;
1863}
1864
1865static int ocfs2_xattr_ibody_remove(struct inode *inode,
1866                                    struct buffer_head *di_bh,
1867                                    struct ocfs2_caching_info *ref_ci,
1868                                    struct buffer_head *ref_root_bh)
1869{
1870
1871        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1872        struct ocfs2_xattr_header *header;
1873        int ret;
1874        struct ocfs2_xattr_value_buf vb = {
1875                .vb_bh = di_bh,
1876                .vb_access = ocfs2_journal_access_di,
1877        };
1878
1879        header = (struct ocfs2_xattr_header *)
1880                 ((void *)di + inode->i_sb->s_blocksize -
1881                 le16_to_cpu(di->i_xattr_inline_size));
1882
1883        ret = ocfs2_remove_value_outside(inode, &vb, header,
1884                                         ref_ci, ref_root_bh);
1885
1886        return ret;
1887}
1888
1889struct ocfs2_rm_xattr_bucket_para {
1890        struct ocfs2_caching_info *ref_ci;
1891        struct buffer_head *ref_root_bh;
1892};
1893
1894static int ocfs2_xattr_block_remove(struct inode *inode,
1895                                    struct buffer_head *blk_bh,
1896                                    struct ocfs2_caching_info *ref_ci,
1897                                    struct buffer_head *ref_root_bh)
1898{
1899        struct ocfs2_xattr_block *xb;
1900        int ret = 0;
1901        struct ocfs2_xattr_value_buf vb = {
1902                .vb_bh = blk_bh,
1903                .vb_access = ocfs2_journal_access_xb,
1904        };
1905        struct ocfs2_rm_xattr_bucket_para args = {
1906                .ref_ci = ref_ci,
1907                .ref_root_bh = ref_root_bh,
1908        };
1909
1910        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1911        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1912                struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1913                ret = ocfs2_remove_value_outside(inode, &vb, header,
1914                                                 ref_ci, ref_root_bh);
1915        } else
1916                ret = ocfs2_iterate_xattr_index_block(inode,
1917                                                blk_bh,
1918                                                ocfs2_rm_xattr_cluster,
1919                                                &args);
1920
1921        return ret;
1922}
1923
1924static int ocfs2_xattr_free_block(struct inode *inode,
1925                                  u64 block,
1926                                  struct ocfs2_caching_info *ref_ci,
1927                                  struct buffer_head *ref_root_bh)
1928{
1929        struct inode *xb_alloc_inode;
1930        struct buffer_head *xb_alloc_bh = NULL;
1931        struct buffer_head *blk_bh = NULL;
1932        struct ocfs2_xattr_block *xb;
1933        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1934        handle_t *handle;
1935        int ret = 0;
1936        u64 blk, bg_blkno;
1937        u16 bit;
1938
1939        ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1940        if (ret < 0) {
1941                mlog_errno(ret);
1942                goto out;
1943        }
1944
1945        ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
1946        if (ret < 0) {
1947                mlog_errno(ret);
1948                goto out;
1949        }
1950
1951        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1952        blk = le64_to_cpu(xb->xb_blkno);
1953        bit = le16_to_cpu(xb->xb_suballoc_bit);
1954        bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1955
1956        xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1957                                EXTENT_ALLOC_SYSTEM_INODE,
1958                                le16_to_cpu(xb->xb_suballoc_slot));
1959        if (!xb_alloc_inode) {
1960                ret = -ENOMEM;
1961                mlog_errno(ret);
1962                goto out;
1963        }
1964        mutex_lock(&xb_alloc_inode->i_mutex);
1965
1966        ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1967        if (ret < 0) {
1968                mlog_errno(ret);
1969                goto out_mutex;
1970        }
1971
1972        handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1973        if (IS_ERR(handle)) {
1974                ret = PTR_ERR(handle);
1975                mlog_errno(ret);
1976                goto out_unlock;
1977        }
1978
1979        ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1980                                       bit, bg_blkno, 1);
1981        if (ret < 0)
1982                mlog_errno(ret);
1983
1984        ocfs2_commit_trans(osb, handle);
1985out_unlock:
1986        ocfs2_inode_unlock(xb_alloc_inode, 1);
1987        brelse(xb_alloc_bh);
1988out_mutex:
1989        mutex_unlock(&xb_alloc_inode->i_mutex);
1990        iput(xb_alloc_inode);
1991out:
1992        brelse(blk_bh);
1993        return ret;
1994}
1995
1996/*
1997 * ocfs2_xattr_remove()
1998 *
1999 * Free extended attribute resources associated with this inode.
2000 */
2001int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2002{
2003        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2004        struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2005        struct ocfs2_refcount_tree *ref_tree = NULL;
2006        struct buffer_head *ref_root_bh = NULL;
2007        struct ocfs2_caching_info *ref_ci = NULL;
2008        handle_t *handle;
2009        int ret;
2010
2011        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2012                return 0;
2013
2014        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2015                return 0;
2016
2017        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2018                ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2019                                               le64_to_cpu(di->i_refcount_loc),
2020                                               1, &ref_tree, &ref_root_bh);
2021                if (ret) {
2022                        mlog_errno(ret);
2023                        goto out;
2024                }
2025                ref_ci = &ref_tree->rf_ci;
2026
2027        }
2028
2029        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2030                ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2031                                               ref_ci, ref_root_bh);
2032                if (ret < 0) {
2033                        mlog_errno(ret);
2034                        goto out;
2035                }
2036        }
2037
2038        if (di->i_xattr_loc) {
2039                ret = ocfs2_xattr_free_block(inode,
2040                                             le64_to_cpu(di->i_xattr_loc),
2041                                             ref_ci, ref_root_bh);
2042                if (ret < 0) {
2043                        mlog_errno(ret);
2044                        goto out;
2045                }
2046        }
2047
2048        handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2049                                   OCFS2_INODE_UPDATE_CREDITS);
2050        if (IS_ERR(handle)) {
2051                ret = PTR_ERR(handle);
2052                mlog_errno(ret);
2053                goto out;
2054        }
2055        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2056                                      OCFS2_JOURNAL_ACCESS_WRITE);
2057        if (ret) {
2058                mlog_errno(ret);
2059                goto out_commit;
2060        }
2061
2062        di->i_xattr_loc = 0;
2063
2064        spin_lock(&oi->ip_lock);
2065        oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2066        di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2067        spin_unlock(&oi->ip_lock);
2068
2069        ret = ocfs2_journal_dirty(handle, di_bh);
2070        if (ret < 0)
2071                mlog_errno(ret);
2072out_commit:
2073        ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2074out:
2075        if (ref_tree)
2076                ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2077        brelse(ref_root_bh);
2078        return ret;
2079}
2080
2081static int ocfs2_xattr_has_space_inline(struct inode *inode,
2082                                        struct ocfs2_dinode *di)
2083{
2084        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2085        unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2086        int free;
2087
2088        if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2089                return 0;
2090
2091        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2092                struct ocfs2_inline_data *idata = &di->id2.i_data;
2093                free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2094        } else if (ocfs2_inode_is_fast_symlink(inode)) {
2095                free = ocfs2_fast_symlink_chars(inode->i_sb) -
2096                        le64_to_cpu(di->i_size);
2097        } else {
2098                struct ocfs2_extent_list *el = &di->id2.i_list;
2099                free = (le16_to_cpu(el->l_count) -
2100                        le16_to_cpu(el->l_next_free_rec)) *
2101                        sizeof(struct ocfs2_extent_rec);
2102        }
2103        if (free >= xattrsize)
2104                return 1;
2105
2106        return 0;
2107}
2108
2109/*
2110 * ocfs2_xattr_ibody_find()
2111 *
2112 * Find extended attribute in inode block and
2113 * fill search info into struct ocfs2_xattr_search.
2114 */
2115static int ocfs2_xattr_ibody_find(struct inode *inode,
2116                                  int name_index,
2117                                  const char *name,
2118                                  struct ocfs2_xattr_search *xs)
2119{
2120        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2121        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2122        int ret;
2123        int has_space = 0;
2124
2125        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2126                return 0;
2127
2128        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2129                down_read(&oi->ip_alloc_sem);
2130                has_space = ocfs2_xattr_has_space_inline(inode, di);
2131                up_read(&oi->ip_alloc_sem);
2132                if (!has_space)
2133                        return 0;
2134        }
2135
2136        xs->xattr_bh = xs->inode_bh;
2137        xs->end = (void *)di + inode->i_sb->s_blocksize;
2138        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2139                xs->header = (struct ocfs2_xattr_header *)
2140                        (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2141        else
2142                xs->header = (struct ocfs2_xattr_header *)
2143                        (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2144        xs->base = (void *)xs->header;
2145        xs->here = xs->header->xh_entries;
2146
2147        /* Find the named attribute. */
2148        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2149                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2150                if (ret && ret != -ENODATA)
2151                        return ret;
2152                xs->not_found = ret;
2153        }
2154
2155        return 0;
2156}
2157
2158/*
2159 * ocfs2_xattr_ibody_set()
2160 *
2161 * Set, replace or remove an extended attribute into inode block.
2162 *
2163 */
2164static int ocfs2_xattr_ibody_set(struct inode *inode,
2165                                 struct ocfs2_xattr_info *xi,
2166                                 struct ocfs2_xattr_search *xs,
2167                                 struct ocfs2_xattr_set_ctxt *ctxt)
2168{
2169        struct ocfs2_inode_info *oi = OCFS2_I(inode);
2170        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2171        int ret;
2172
2173        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2174                return -ENOSPC;
2175
2176        down_write(&oi->ip_alloc_sem);
2177        if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2178                if (!ocfs2_xattr_has_space_inline(inode, di)) {
2179                        ret = -ENOSPC;
2180                        goto out;
2181                }
2182        }
2183
2184        ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2185                                (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2186out:
2187        up_write(&oi->ip_alloc_sem);
2188
2189        return ret;
2190}
2191
2192/*
2193 * ocfs2_xattr_block_find()
2194 *
2195 * Find extended attribute in external block and
2196 * fill search info into struct ocfs2_xattr_search.
2197 */
2198static int ocfs2_xattr_block_find(struct inode *inode,
2199                                  int name_index,
2200                                  const char *name,
2201                                  struct ocfs2_xattr_search *xs)
2202{
2203        struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2204        struct buffer_head *blk_bh = NULL;
2205        struct ocfs2_xattr_block *xb;
2206        int ret = 0;
2207
2208        if (!di->i_xattr_loc)
2209                return ret;
2210
2211        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2212                                     &blk_bh);
2213        if (ret < 0) {
2214                mlog_errno(ret);
2215                return ret;
2216        }
2217
2218        xs->xattr_bh = blk_bh;
2219        xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2220
2221        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2222                xs->header = &xb->xb_attrs.xb_header;
2223                xs->base = (void *)xs->header;
2224                xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2225                xs->here = xs->header->xh_entries;
2226
2227                ret = ocfs2_xattr_find_entry(name_index, name, xs);
2228        } else
2229                ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2230                                                   name_index,
2231                                                   name, xs);
2232
2233        if (ret && ret != -ENODATA) {
2234                xs->xattr_bh = NULL;
2235                goto cleanup;
2236        }
2237        xs->not_found = ret;
2238        return 0;
2239cleanup:
2240        brelse(blk_bh);
2241
2242        return ret;
2243}
2244
2245static int ocfs2_create_xattr_block(handle_t *handle,
2246                                    struct inode *inode,
2247                                    struct buffer_head *inode_bh,
2248                                    struct ocfs2_alloc_context *meta_ac,
2249                                    struct buffer_head **ret_bh,
2250                                    int indexed)
2251{
2252        int ret;
2253        u16 suballoc_bit_start;
2254        u32 num_got;
2255        u64 first_blkno;
2256        struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2257        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2258        struct buffer_head *new_bh = NULL;
2259        struct ocfs2_xattr_block *xblk;
2260
2261        ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2262                                      OCFS2_JOURNAL_ACCESS_CREATE);
2263        if (ret < 0) {
2264                mlog_errno(ret);
2265                goto end;
2266        }
2267
2268        ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2269                                   &suballoc_bit_start, &num_got,
2270                                   &first_blkno);
2271        if (ret < 0) {
2272                mlog_errno(ret);
2273                goto end;
2274        }
2275
2276        new_bh = sb_getblk(inode->i_sb, first_blkno);
2277        ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2278
2279        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2280                                      new_bh,
2281                                      OCFS2_JOURNAL_ACCESS_CREATE);
2282        if (ret < 0) {
2283                mlog_errno(ret);
2284                goto end;
2285        }
2286
2287        /* Initialize ocfs2_xattr_block */
2288        xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2289        memset(xblk, 0, inode->i_sb->s_blocksize);
2290        strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2291        xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2292        xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2293        xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2294        xblk->xb_blkno = cpu_to_le64(first_blkno);
2295
2296        if (indexed) {
2297                struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2298                xr->xt_clusters = cpu_to_le32(1);
2299                xr->xt_last_eb_blk = 0;
2300                xr->xt_list.l_tree_depth = 0;
2301                xr->xt_list.l_count = cpu_to_le16(
2302                                        ocfs2_xattr_recs_per_xb(inode->i_sb));
2303                xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2304                xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2305        }
2306
2307        ret = ocfs2_journal_dirty(handle, new_bh);
2308        if (ret < 0) {
2309                mlog_errno(ret);
2310                goto end;
2311        }
2312        di->i_xattr_loc = cpu_to_le64(first_blkno);
2313        ocfs2_journal_dirty(handle, inode_bh);
2314
2315        *ret_bh = new_bh;
2316        new_bh = NULL;
2317
2318end:
2319        brelse(new_bh);
2320        return ret;
2321}
2322
2323/*
2324 * ocfs2_xattr_block_set()
2325 *
2326 * Set, replace or remove an extended attribute into external block.
2327 *
2328 */
2329static int ocfs2_xattr_block_set(struct inode *inode,
2330                                 struct ocfs2_xattr_info *xi,
2331                                 struct ocfs2_xattr_search *xs,
2332                                 struct ocfs2_xattr_set_ctxt *ctxt)
2333{
2334        struct buffer_head *new_bh = NULL;
2335        handle_t *handle = ctxt->handle;
2336        struct ocfs2_xattr_block *xblk = NULL;
2337        int ret;
2338
2339        if (!xs->xattr_bh) {
2340                ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2341                                               ctxt->meta_ac, &new_bh, 0);
2342                if (ret) {
2343                        mlog_errno(ret);
2344                        goto end;
2345                }
2346
2347                xs->xattr_bh = new_bh;
2348                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2349                xs->header = &xblk->xb_attrs.xb_header;
2350                xs->base = (void *)xs->header;
2351                xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2352                xs->here = xs->header->xh_entries;
2353        } else
2354                xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2355
2356        if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2357                /* Set extended attribute into external block */
2358                ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2359                                            OCFS2_HAS_XATTR_FL);
2360                if (!ret || ret != -ENOSPC)
2361                        goto end;
2362
2363                ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2364                if (ret)
2365                        goto end;
2366        }
2367
2368        ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2369
2370end:
2371
2372        return ret;
2373}
2374
2375/* Check whether the new xattr can be inserted into the inode. */
2376static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2377                                       struct ocfs2_xattr_info *xi,
2378                                       struct ocfs2_xattr_search *xs)
2379{
2380        u64 value_size;
2381        struct ocfs2_xattr_entry *last;
2382        int free, i;
2383        size_t min_offs = xs->end - xs->base;
2384
2385        if (!xs->header)
2386                return 0;
2387
2388        last = xs->header->xh_entries;
2389
2390        for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2391                size_t offs = le16_to_cpu(last->xe_name_offset);
2392                if (offs < min_offs)
2393                        min_offs = offs;
2394                last += 1;
2395        }
2396
2397        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2398        if (free < 0)
2399                return 0;
2400
2401        BUG_ON(!xs->not_found);
2402
2403        if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2404                value_size = OCFS2_XATTR_ROOT_SIZE;
2405        else
2406                value_size = OCFS2_XATTR_SIZE(xi->value_len);
2407
2408        if (free >= sizeof(struct ocfs2_xattr_entry) +
2409                   OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2410                return 1;
2411
2412        return 0;
2413}
2414
2415static int ocfs2_calc_xattr_set_need(struct inode *inode,
2416                                     struct ocfs2_dinode *di,
2417                                     struct ocfs2_xattr_info *xi,
2418                                     struct ocfs2_xattr_search *xis,
2419                                     struct ocfs2_xattr_search *xbs,
2420                                     int *clusters_need,
2421                                     int *meta_need,
2422                                     int *credits_need)
2423{
2424        int ret = 0, old_in_xb = 0;
2425        int clusters_add = 0, meta_add = 0, credits = 0;
2426        struct buffer_head *bh = NULL;
2427        struct ocfs2_xattr_block *xb = NULL;
2428        struct ocfs2_xattr_entry *xe = NULL;
2429        struct ocfs2_xattr_value_root *xv = NULL;
2430        char *base = NULL;
2431        int name_offset, name_len = 0;
2432        u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2433                                                    xi->value_len);
2434        u64 value_size;
2435
2436        /*
2437         * Calculate the clusters we need to write.
2438         * No matter whether we replace an old one or add a new one,
2439         * we need this for writing.
2440         */
2441        if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2442                credits += new_clusters *
2443                           ocfs2_clusters_to_blocks(inode->i_sb, 1);
2444
2445        if (xis->not_found && xbs->not_found) {
2446                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2447
2448                if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2449                        clusters_add += new_clusters;
2450                        credits += ocfs2_calc_extend_credits(inode->i_sb,
2451                                                        &def_xv.xv.xr_list,
2452                                                        new_clusters);
2453                }
2454
2455                goto meta_guess;
2456        }
2457
2458        if (!xis->not_found) {
2459                xe = xis->here;
2460                name_offset = le16_to_cpu(xe->xe_name_offset);
2461                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2462                base = xis->base;
2463                credits += OCFS2_INODE_UPDATE_CREDITS;
2464        } else {
2465                int i, block_off = 0;
2466                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2467                xe = xbs->here;
2468                name_offset = le16_to_cpu(xe->xe_name_offset);
2469                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2470                i = xbs->here - xbs->header->xh_entries;
2471                old_in_xb = 1;
2472
2473                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2474                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2475                                                        bucket_xh(xbs->bucket),
2476                                                        i, &block_off,
2477                                                        &name_offset);
2478                        base = bucket_block(xbs->bucket, block_off);
2479                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2480                } else {
2481                        base = xbs->base;
2482                        credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2483                }
2484        }
2485
2486        /*
2487         * delete a xattr doesn't need metadata and cluster allocation.
2488         * so just calculate the credits and return.
2489         *
2490         * The credits for removing the value tree will be extended
2491         * by ocfs2_remove_extent itself.
2492         */
2493        if (!xi->value) {
2494                if (!ocfs2_xattr_is_local(xe))
2495                        credits += ocfs2_remove_extent_credits(inode->i_sb);
2496
2497                goto out;
2498        }
2499
2500        /* do cluster allocation guess first. */
2501        value_size = le64_to_cpu(xe->xe_value_size);
2502
2503        if (old_in_xb) {
2504                /*
2505                 * In xattr set, we always try to set the xe in inode first,
2506                 * so if it can be inserted into inode successfully, the old
2507                 * one will be removed from the xattr block, and this xattr
2508                 * will be inserted into inode as a new xattr in inode.
2509                 */
2510                if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2511                        clusters_add += new_clusters;
2512                        credits += ocfs2_remove_extent_credits(inode->i_sb) +
2513                                    OCFS2_INODE_UPDATE_CREDITS;
2514                        if (!ocfs2_xattr_is_local(xe))
2515                                credits += ocfs2_calc_extend_credits(
2516                                                        inode->i_sb,
2517                                                        &def_xv.xv.xr_list,
2518                                                        new_clusters);
2519                        goto out;
2520                }
2521        }
2522
2523        if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2524                /* the new values will be stored outside. */
2525                u32 old_clusters = 0;
2526
2527                if (!ocfs2_xattr_is_local(xe)) {
2528                        old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2529                                                                 value_size);
2530                        xv = (struct ocfs2_xattr_value_root *)
2531                             (base + name_offset + name_len);
2532                        value_size = OCFS2_XATTR_ROOT_SIZE;
2533                } else
2534                        xv = &def_xv.xv;
2535
2536                if (old_clusters >= new_clusters) {
2537                        credits += ocfs2_remove_extent_credits(inode->i_sb);
2538                        goto out;
2539                } else {
2540                        meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2541                        clusters_add += new_clusters - old_clusters;
2542                        credits += ocfs2_calc_extend_credits(inode->i_sb,
2543                                                             &xv->xr_list,
2544                                                             new_clusters -
2545                                                             old_clusters);
2546                        if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2547                                goto out;
2548                }
2549        } else {
2550                /*
2551                 * Now the new value will be stored inside. So if the new
2552                 * value is smaller than the size of value root or the old
2553                 * value, we don't need any allocation, otherwise we have
2554                 * to guess metadata allocation.
2555                 */
2556                if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2557                    (!ocfs2_xattr_is_local(xe) &&
2558                     OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2559                        goto out;
2560        }
2561
2562meta_guess:
2563        /* calculate metadata allocation. */
2564        if (di->i_xattr_loc) {
2565                if (!xbs->xattr_bh) {
2566                        ret = ocfs2_read_xattr_block(inode,
2567                                                     le64_to_cpu(di->i_xattr_loc),
2568                                                     &bh);
2569                        if (ret) {
2570                                mlog_errno(ret);
2571                                goto out;
2572                        }
2573
2574                        xb = (struct ocfs2_xattr_block *)bh->b_data;
2575                } else
2576                        xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2577
2578                /*
2579                 * If there is already an xattr tree, good, we can calculate
2580                 * like other b-trees. Otherwise we may have the chance of
2581                 * create a tree, the credit calculation is borrowed from
2582                 * ocfs2_calc_extend_credits with root_el = NULL. And the
2583                 * new tree will be cluster based, so no meta is needed.
2584                 */
2585                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2586                        struct ocfs2_extent_list *el =
2587                                 &xb->xb_attrs.xb_root.xt_list;
2588                        meta_add += ocfs2_extend_meta_needed(el);
2589                        credits += ocfs2_calc_extend_credits(inode->i_sb,
2590                                                             el, 1);
2591                } else
2592                        credits += OCFS2_SUBALLOC_ALLOC + 1;
2593
2594                /*
2595                 * This cluster will be used either for new bucket or for
2596                 * new xattr block.
2597                 * If the cluster size is the same as the bucket size, one
2598                 * more is needed since we may need to extend the bucket
2599                 * also.
2600                 */
2601                clusters_add += 1;
2602                credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2603                if (OCFS2_XATTR_BUCKET_SIZE ==
2604                        OCFS2_SB(inode->i_sb)->s_clustersize) {
2605                        credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2606                        clusters_add += 1;
2607                }
2608        } else {
2609                meta_add += 1;
2610                credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2611        }
2612out:
2613        if (clusters_need)
2614                *clusters_need = clusters_add;
2615        if (meta_need)
2616                *meta_need = meta_add;
2617        if (credits_need)
2618                *credits_need = credits;
2619        brelse(bh);
2620        return ret;
2621}
2622
2623static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2624                                     struct ocfs2_dinode *di,
2625                                     struct ocfs2_xattr_info *xi,
2626                                     struct ocfs2_xattr_search *xis,
2627                                     struct ocfs2_xattr_search *xbs,
2628                                     struct ocfs2_xattr_set_ctxt *ctxt,
2629                                     int extra_meta,
2630                                     int *credits)
2631{
2632        int clusters_add, meta_add, ret;
2633        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2634
2635        memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2636
2637        ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2638
2639        ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2640                                        &clusters_add, &meta_add, credits);
2641        if (ret) {
2642                mlog_errno(ret);
2643                return ret;
2644        }
2645
2646        meta_add += extra_meta;
2647        mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2648             "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2649
2650        if (meta_add) {
2651                ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2652                                                        &ctxt->meta_ac);
2653                if (ret) {
2654                        mlog_errno(ret);
2655                        goto out;
2656                }
2657        }
2658
2659        if (clusters_add) {
2660                ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2661                if (ret)
2662                        mlog_errno(ret);
2663        }
2664out:
2665        if (ret) {
2666                if (ctxt->meta_ac) {
2667                        ocfs2_free_alloc_context(ctxt->meta_ac);
2668                        ctxt->meta_ac = NULL;
2669                }
2670
2671                /*
2672                 * We cannot have an error and a non null ctxt->data_ac.
2673                 */
2674        }
2675
2676        return ret;
2677}
2678
2679static int __ocfs2_xattr_set_handle(struct inode *inode,
2680                                    struct ocfs2_dinode *di,
2681                                    struct ocfs2_xattr_info *xi,
2682                                    struct ocfs2_xattr_search *xis,
2683                                    struct ocfs2_xattr_search *xbs,
2684                                    struct ocfs2_xattr_set_ctxt *ctxt)
2685{
2686        int ret = 0, credits, old_found;
2687
2688        if (!xi->value) {
2689                /* Remove existing extended attribute */
2690                if (!xis->not_found)
2691                        ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2692                else if (!xbs->not_found)
2693                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2694        } else {
2695                /* We always try to set extended attribute into inode first*/
2696                ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2697                if (!ret && !xbs->not_found) {
2698                        /*
2699                         * If succeed and that extended attribute existing in
2700                         * external block, then we will remove it.
2701                         */
2702                        xi->value = NULL;
2703                        xi->value_len = 0;
2704
2705                        old_found = xis->not_found;
2706                        xis->not_found = -ENODATA;
2707                        ret = ocfs2_calc_xattr_set_need(inode,
2708                                                        di,
2709                                                        xi,
2710                                                        xis,
2711                                                        xbs,
2712                                                        NULL,
2713                                                        NULL,
2714                                                        &credits);
2715                        xis->not_found = old_found;
2716                        if (ret) {
2717                                mlog_errno(ret);
2718                                goto out;
2719                        }
2720
2721                        ret = ocfs2_extend_trans(ctxt->handle, credits +
2722                                        ctxt->handle->h_buffer_credits);
2723                        if (ret) {
2724                                mlog_errno(ret);
2725                                goto out;
2726                        }
2727                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2728                } else if (ret == -ENOSPC) {
2729                        if (di->i_xattr_loc && !xbs->xattr_bh) {
2730                                ret = ocfs2_xattr_block_find(inode,
2731                                                             xi->name_index,
2732                                                             xi->name, xbs);
2733                                if (ret)
2734                                        goto out;
2735
2736                                old_found = xis->not_found;
2737                                xis->not_found = -ENODATA;
2738                                ret = ocfs2_calc_xattr_set_need(inode,
2739                                                                di,
2740                                                                xi,
2741                                                                xis,
2742                                                                xbs,
2743                                                                NULL,
2744                                                                NULL,
2745                                                                &credits);
2746                                xis->not_found = old_found;
2747                                if (ret) {
2748                                        mlog_errno(ret);
2749                                        goto out;
2750                                }
2751
2752                                ret = ocfs2_extend_trans(ctxt->handle, credits +
2753                                        ctxt->handle->h_buffer_credits);
2754                                if (ret) {
2755                                        mlog_errno(ret);
2756                                        goto out;
2757                                }
2758                        }
2759                        /*
2760                         * If no space in inode, we will set extended attribute
2761                         * into external block.
2762                         */
2763                        ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2764                        if (ret)
2765                                goto out;
2766                        if (!xis->not_found) {
2767                                /*
2768                                 * If succeed and that extended attribute
2769                                 * existing in inode, we will remove it.
2770                                 */
2771                                xi->value = NULL;
2772                                xi->value_len = 0;
2773                                xbs->not_found = -ENODATA;
2774                                ret = ocfs2_calc_xattr_set_need(inode,
2775                                                                di,
2776                                                                xi,
2777                                                                xis,
2778                                                                xbs,
2779                                                                NULL,
2780                                                                NULL,
2781                                                                &credits);
2782                                if (ret) {
2783                                        mlog_errno(ret);
2784                                        goto out;
2785                                }
2786
2787                                ret = ocfs2_extend_trans(ctxt->handle, credits +
2788                                                ctxt->handle->h_buffer_credits);
2789                                if (ret) {
2790                                        mlog_errno(ret);
2791                                        goto out;
2792                                }
2793                                ret = ocfs2_xattr_ibody_set(inode, xi,
2794                                                            xis, ctxt);
2795                        }
2796                }
2797        }
2798
2799        if (!ret) {
2800                /* Update inode ctime. */
2801                ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2802                                              xis->inode_bh,
2803                                              OCFS2_JOURNAL_ACCESS_WRITE);
2804                if (ret) {
2805                        mlog_errno(ret);
2806                        goto out;
2807                }
2808
2809                inode->i_ctime = CURRENT_TIME;
2810                di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2811                di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2812                ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2813        }
2814out:
2815        return ret;
2816}
2817
2818/*
2819 * This function only called duing creating inode
2820 * for init security/acl xattrs of the new inode.
2821 * All transanction credits have been reserved in mknod.
2822 */
2823int ocfs2_xattr_set_handle(handle_t *handle,
2824                           struct inode *inode,
2825                           struct buffer_head *di_bh,
2826                           int name_index,
2827                           const char *name,
2828                           const void *value,
2829                           size_t value_len,
2830                           int flags,
2831                           struct ocfs2_alloc_context *meta_ac,
2832                           struct ocfs2_alloc_context *data_ac)
2833{
2834        struct ocfs2_dinode *di;
2835        int ret;
2836
2837        struct ocfs2_xattr_info xi = {
2838                .name_index = name_index,
2839                .name = name,
2840                .value = value,
2841                .value_len = value_len,
2842        };
2843
2844        struct ocfs2_xattr_search xis = {
2845                .not_found = -ENODATA,
2846        };
2847
2848        struct ocfs2_xattr_search xbs = {
2849                .not_found = -ENODATA,
2850        };
2851
2852        struct ocfs2_xattr_set_ctxt ctxt = {
2853                .handle = handle,
2854                .meta_ac = meta_ac,
2855                .data_ac = data_ac,
2856        };
2857
2858        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2859                return -EOPNOTSUPP;
2860
2861        /*
2862         * In extreme situation, may need xattr bucket when
2863         * block size is too small. And we have already reserved
2864         * the credits for bucket in mknod.
2865         */
2866        if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2867                xbs.bucket = ocfs2_xattr_bucket_new(inode);
2868                if (!xbs.bucket) {
2869                        mlog_errno(-ENOMEM);
2870                        return -ENOMEM;
2871                }
2872        }
2873
2874        xis.inode_bh = xbs.inode_bh = di_bh;
2875        di = (struct ocfs2_dinode *)di_bh->b_data;
2876
2877        down_write(&OCFS2_I(inode)->ip_xattr_sem);
2878
2879        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2880        if (ret)
2881                goto cleanup;
2882        if (xis.not_found) {
2883                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2884                if (ret)
2885                        goto cleanup;
2886        }
2887
2888        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2889
2890cleanup:
2891        up_write(&OCFS2_I(inode)->ip_xattr_sem);
2892        brelse(xbs.xattr_bh);
2893        ocfs2_xattr_bucket_free(xbs.bucket);
2894
2895        return ret;
2896}
2897
2898/*
2899 * ocfs2_xattr_set()
2900 *
2901 * Set, replace or remove an extended attribute for this inode.
2902 * value is NULL to remove an existing extended attribute, else either
2903 * create or replace an extended attribute.
2904 */
2905int ocfs2_xattr_set(struct inode *inode,
2906                    int name_index,
2907                    const char *name,
2908                    const void *value,
2909                    size_t value_len,
2910                    int flags)
2911{
2912        struct buffer_head *di_bh = NULL;
2913        struct ocfs2_dinode *di;
2914        int ret, credits, ref_meta = 0, ref_credits = 0;
2915        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2916        struct inode *tl_inode = osb->osb_tl_inode;
2917        struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2918        struct ocfs2_refcount_tree *ref_tree = NULL;
2919
2920        struct ocfs2_xattr_info xi = {
2921                .name_index = name_index,
2922                .name = name,
2923                .value = value,
2924                .value_len = value_len,
2925        };
2926
2927        struct ocfs2_xattr_search xis = {
2928                .not_found = -ENODATA,
2929        };
2930
2931        struct ocfs2_xattr_search xbs = {
2932                .not_found = -ENODATA,
2933        };
2934
2935        if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2936                return -EOPNOTSUPP;
2937
2938        /*
2939         * Only xbs will be used on indexed trees.  xis doesn't need a
2940         * bucket.
2941         */
2942        xbs.bucket = ocfs2_xattr_bucket_new(inode);
2943        if (!xbs.bucket) {
2944                mlog_errno(-ENOMEM);
2945                return -ENOMEM;
2946        }
2947
2948        ret = ocfs2_inode_lock(inode, &di_bh, 1);
2949        if (ret < 0) {
2950                mlog_errno(ret);
2951                goto cleanup_nolock;
2952        }
2953        xis.inode_bh = xbs.inode_bh = di_bh;
2954        di = (struct ocfs2_dinode *)di_bh->b_data;
2955
2956        down_write(&OCFS2_I(inode)->ip_xattr_sem);
2957        /*
2958         * Scan inode and external block to find the same name
2959         * extended attribute and collect search infomation.
2960         */
2961        ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2962        if (ret)
2963                goto cleanup;
2964        if (xis.not_found) {
2965                ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2966                if (ret)
2967                        goto cleanup;
2968        }
2969
2970        if (xis.not_found && xbs.not_found) {
2971                ret = -ENODATA;
2972                if (flags & XATTR_REPLACE)
2973                        goto cleanup;
2974                ret = 0;
2975                if (!value)
2976                        goto cleanup;
2977        } else {
2978                ret = -EEXIST;
2979                if (flags & XATTR_CREATE)
2980                        goto cleanup;
2981        }
2982
2983        /* Check whether the value is refcounted and do some prepartion. */
2984        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2985            (!xis.not_found || !xbs.not_found)) {
2986                ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2987                                                   &xis, &xbs, &ref_tree,
2988                                                   &ref_meta, &ref_credits);
2989                if (ret) {
2990                        mlog_errno(ret);
2991                        goto cleanup;
2992                }
2993        }
2994
2995        mutex_lock(&tl_inode->i_mutex);
2996
2997        if (ocfs2_truncate_log_needs_flush(osb)) {
2998                ret = __ocfs2_flush_truncate_log(osb);
2999                if (ret < 0) {
3000                        mutex_unlock(&tl_inode->i_mutex);
3001                        mlog_errno(ret);
3002                        goto cleanup;
3003                }
3004        }
3005        mutex_unlock(&tl_inode->i_mutex);
3006
3007        ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3008                                        &xbs, &ctxt, ref_meta, &credits);
3009        if (ret) {
3010                mlog_errno(ret);
3011                goto cleanup;
3012        }
3013
3014        /* we need to update inode's ctime field, so add credit for it. */
3015        credits += OCFS2_INODE_UPDATE_CREDITS;
3016        ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3017        if (IS_ERR(ctxt.handle)) {
3018                ret = PTR_ERR(ctxt.handle);
3019                mlog_errno(ret);
3020                goto cleanup;
3021        }
3022
3023        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3024
3025        ocfs2_commit_trans(osb, ctxt.handle);
3026
3027        if (ctxt.data_ac)
3028                ocfs2_free_alloc_context(ctxt.data_ac);
3029        if (ctxt.meta_ac)
3030                ocfs2_free_alloc_context(ctxt.meta_ac);
3031        if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3032                ocfs2_schedule_truncate_log_flush(osb, 1);
3033        ocfs2_run_deallocs(osb, &ctxt.dealloc);
3034
3035cleanup:
3036        if (ref_tree)
3037                ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3038        up_write(&OCFS2_I(inode)->ip_xattr_sem);
3039        if (!value && !ret) {
3040                ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3041                if (ret)
3042                        mlog_errno(ret);
3043        }
3044        ocfs2_inode_unlock(inode, 1);
3045cleanup_nolock:
3046        brelse(di_bh);
3047        brelse(xbs.xattr_bh);
3048        ocfs2_xattr_bucket_free(xbs.bucket);
3049
3050        return ret;
3051}
3052
3053/*
3054 * Find the xattr extent rec which may contains name_hash.
3055 * e_cpos will be the first name hash of the xattr rec.
3056 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3057 */
3058static int ocfs2_xattr_get_rec(struct inode *inode,
3059                               u32 name_hash,
3060                               u64 *p_blkno,
3061                               u32 *e_cpos,
3062                               u32 *num_clusters,
3063                               struct ocfs2_extent_list *el)
3064{
3065        int ret = 0, i;
3066        struct buffer_head *eb_bh = NULL;
3067        struct ocfs2_extent_block *eb;
3068        struct ocfs2_extent_rec *rec = NULL;
3069        u64 e_blkno = 0;
3070
3071        if (el->l_tree_depth) {
3072                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3073                                      &eb_bh);
3074                if (ret) {
3075                        mlog_errno(ret);
3076                        goto out;
3077                }
3078
3079                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3080                el = &eb->h_list;
3081
3082                if (el->l_tree_depth) {
3083                        ocfs2_error(inode->i_sb,
3084                                    "Inode %lu has non zero tree depth in "
3085                                    "xattr tree block %llu\n", inode->i_ino,
3086                                    (unsigned long long)eb_bh->b_blocknr);
3087                        ret = -EROFS;
3088                        goto out;
3089                }
3090        }
3091
3092        for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3093                rec = &el->l_recs[i];
3094
3095                if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3096                        e_blkno = le64_to_cpu(rec->e_blkno);
3097                        break;
3098                }
3099        }
3100
3101        if (!e_blkno) {
3102                ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3103                            "record (%u, %u, 0) in xattr", inode->i_ino,
3104                            le32_to_cpu(rec->e_cpos),
3105                            ocfs2_rec_clusters(el, rec));
3106                ret = -EROFS;
3107                goto out;
3108        }
3109
3110        *p_blkno = le64_to_cpu(rec->e_blkno);
3111        *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3112        if (e_cpos)
3113                *e_cpos = le32_to_cpu(rec->e_cpos);
3114out:
3115        brelse(eb_bh);
3116        return ret;
3117}
3118
3119typedef int (xattr_bucket_func)(struct inode *inode,
3120                                struct ocfs2_xattr_bucket *bucket,
3121                                void *para);
3122
3123static int ocfs2_find_xe_in_bucket(struct inode *inode,
3124                                   struct ocfs2_xattr_bucket *bucket,
3125                                   int name_index,
3126                                   const char *name,
3127                                   u32 name_hash,
3128                                   u16 *xe_index,
3129                                   int *found)
3130{
3131        int i, ret = 0, cmp = 1, block_off, new_offset;
3132        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3133        size_t name_len = strlen(name);
3134        struct ocfs2_xattr_entry *xe = NULL;
3135        char *xe_name;
3136
3137        /*
3138         * We don't use binary search in the bucket because there
3139         * may be multiple entries with the same name hash.
3140         */
3141        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3142                xe = &xh->xh_entries[i];
3143
3144                if (name_hash > le32_to_cpu(xe->xe_name_hash))
3145                        continue;
3146                else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3147                        break;
3148
3149                cmp = name_index - ocfs2_xattr_get_type(xe);
3150                if (!cmp)
3151                        cmp = name_len - xe->xe_name_len;
3152                if (cmp)
3153                        continue;
3154
3155                ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3156                                                        xh,
3157                                                        i,
3158                                                        &block_off,
3159                                                        &new_offset);
3160                if (ret) {
3161                        mlog_errno(ret);
3162                        break;
3163                }
3164
3165
3166                xe_name = bucket_block(bucket, block_off) + new_offset;
3167                if (!memcmp(name, xe_name, name_len)) {
3168                        *xe_index = i;
3169                        *found = 1;
3170                        ret = 0;
3171                        break;
3172                }
3173        }
3174
3175        return ret;
3176}
3177
3178/*
3179 * Find the specified xattr entry in a series of buckets.
3180 * This series start from p_blkno and last for num_clusters.
3181 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3182 * the num of the valid buckets.
3183 *
3184 * Return the buffer_head this xattr should reside in. And if the xattr's
3185 * hash is in the gap of 2 buckets, return the lower bucket.
3186 */
3187static int ocfs2_xattr_bucket_find(struct inode *inode,
3188                                   int name_index,
3189                                   const char *name,
3190                                   u32 name_hash,
3191                                   u64 p_blkno,
3192                                   u32 first_hash,
3193                                   u32 num_clusters,
3194                                   struct ocfs2_xattr_search *xs)
3195{
3196        int ret, found = 0;
3197        struct ocfs2_xattr_header *xh = NULL;
3198        struct ocfs2_xattr_entry *xe = NULL;
3199        u16 index = 0;
3200        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3201        int low_bucket = 0, bucket, high_bucket;
3202        struct ocfs2_xattr_bucket *search;
3203        u32 last_hash;
3204        u64 blkno, lower_blkno = 0;
3205
3206        search = ocfs2_xattr_bucket_new(inode);
3207        if (!search) {
3208                ret = -ENOMEM;
3209                mlog_errno(ret);
3210                goto out;
3211        }
3212
3213        ret = ocfs2_read_xattr_bucket(search, p_blkno);
3214        if (ret) {
3215                mlog_errno(ret);
3216                goto out;
3217        }
3218
3219        xh = bucket_xh(search);
3220        high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3221        while (low_bucket <= high_bucket) {
3222                ocfs2_xattr_bucket_relse(search);
3223
3224                bucket = (low_bucket + high_bucket) / 2;
3225                blkno = p_blkno + bucket * blk_per_bucket;
3226                ret = ocfs2_read_xattr_bucket(search, blkno);
3227                if (ret) {
3228                        mlog_errno(ret);
3229                        goto out;
3230                }
3231
3232                xh = bucket_xh(search);
3233                xe = &xh->xh_entries[0];
3234                if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3235                        high_bucket = bucket - 1;
3236                        continue;
3237                }
3238
3239                /*
3240                 * Check whether the hash of the last entry in our
3241                 * bucket is larger than the search one. for an empty
3242                 * bucket, the last one is also the first one.
3243                 */
3244                if (xh->xh_count)
3245                        xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3246
3247                last_hash = le32_to_cpu(xe->xe_name_hash);
3248
3249                /* record lower_blkno which may be the insert place. */
3250                lower_blkno = blkno;
3251
3252                if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3253                        low_bucket = bucket + 1;
3254                        continue;
3255                }
3256
3257                /* the searched xattr should reside in this bucket if exists. */
3258                ret = ocfs2_find_xe_in_bucket(inode, search,
3259                                              name_index, name, name_hash,
3260                                              &index, &found);
3261                if (ret) {
3262                        mlog_errno(ret);
3263                        goto out;
3264                }
3265                break;
3266        }
3267
3268        /*
3269         * Record the bucket we have found.
3270         * When the xattr's hash value is in the gap of 2 buckets, we will
3271         * always set it to the previous bucket.
3272         */
3273        if (!lower_blkno)
3274                lower_blkno = p_blkno;
3275
3276        /* This should be in cache - we just read it during the search */
3277        ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3278        if (ret) {
3279                mlog_errno(ret);
3280                goto out;
3281        }
3282
3283        xs->header = bucket_xh(xs->bucket);
3284        xs->base = bucket_block(xs->bucket, 0);
3285        xs->end = xs->base + inode->i_sb->s_blocksize;
3286
3287        if (found) {
3288                xs->here = &xs->header->xh_entries[index];
3289                mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3290                     (unsigned long long)bucket_blkno(xs->bucket), index);
3291        } else
3292                ret = -ENODATA;
3293
3294out:
3295        ocfs2_xattr_bucket_free(search);
3296        return ret;
3297}
3298
3299static int ocfs2_xattr_index_block_find(struct inode *inode,
3300                                        struct buffer_head *root_bh,
3301                                        int name_index,
3302                                        const char *name,
3303                                        struct ocfs2_xattr_search *xs)
3304{
3305        int ret;
3306        struct ocfs2_xattr_block *xb =
3307                        (struct ocfs2_xattr_block *)root_bh->b_data;
3308        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3309        struct ocfs2_extent_list *el = &xb_root->xt_list;
3310        u64 p_blkno = 0;
3311        u32 first_hash, num_clusters = 0;
3312        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3313
3314        if (le16_to_cpu(el->l_next_free_rec) == 0)
3315                return -ENODATA;
3316
3317        mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3318             name, name_hash, name_index);
3319
3320        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3321                                  &num_clusters, el);
3322        if (ret) {
3323                mlog_errno(ret);
3324                goto out;
3325        }
3326
3327        BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3328
3329        mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3330             "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3331             first_hash);
3332
3333        ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3334                                      p_blkno, first_hash, num_clusters, xs);
3335
3336out:
3337        return ret;
3338}
3339
3340static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3341                                       u64 blkno,
3342                                       u32 clusters,
3343                                       xattr_bucket_func *func,
3344                                       void *para)
3345{
3346        int i, ret = 0;
3347        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3348        u32 num_buckets = clusters * bpc;
3349        struct ocfs2_xattr_bucket *bucket;
3350
3351        bucket = ocfs2_xattr_bucket_new(inode);
3352        if (!bucket) {
3353                mlog_errno(-ENOMEM);
3354                return -ENOMEM;
3355        }
3356
3357        mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3358             clusters, (unsigned long long)blkno);
3359
3360        for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3361                ret = ocfs2_read_xattr_bucket(bucket, blkno);
3362                if (ret) {
3363                        mlog_errno(ret);
3364                        break;
3365                }
3366
3367                /*
3368                 * The real bucket num in this series of blocks is stored
3369                 * in the 1st bucket.
3370                 */
3371                if (i == 0)
3372                        num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3373
3374                mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3375                     (unsigned long long)blkno,
3376                     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3377                if (func) {
3378                        ret = func(inode, bucket, para);
3379                        if (ret && ret != -ERANGE)
3380                                mlog_errno(ret);
3381                        /* Fall through to bucket_relse() */
3382                }
3383
3384                ocfs2_xattr_bucket_relse(bucket);
3385                if (ret)
3386                        break;
3387        }
3388
3389        ocfs2_xattr_bucket_free(bucket);
3390        return ret;
3391}
3392
3393struct ocfs2_xattr_tree_list {
3394        char *buffer;
3395        size_t buffer_size;
3396        size_t result;
3397};
3398
3399static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3400                                             struct ocfs2_xattr_header *xh,
3401                                             int index,
3402                                             int *block_off,
3403                                             int *new_offset)
3404{
3405        u16 name_offset;
3406
3407        if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3408                return -EINVAL;
3409
3410        name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3411
3412        *block_off = name_offset >> sb->s_blocksize_bits;
3413        *new_offset = name_offset % sb->s_blocksize;
3414
3415        return 0;
3416}
3417
3418static int ocfs2_list_xattr_bucket(struct inode *inode,
3419                                   struct ocfs2_xattr_bucket *bucket,
3420                                   void *para)
3421{
3422        int ret = 0, type;
3423        struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3424        int i, block_off, new_offset;
3425        const char *prefix, *name;
3426
3427        for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3428                struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3429                type = ocfs2_xattr_get_type(entry);
3430                prefix = ocfs2_xattr_prefix(type);
3431
3432                if (prefix) {
3433                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3434                                                                bucket_xh(bucket),
3435                                                                i,
3436                                                                &block_off,
3437                                                                &new_offset);
3438                        if (ret)
3439                                break;
3440
3441                        name = (const char *)bucket_block(bucket, block_off) +
3442                                new_offset;
3443                        ret = ocfs2_xattr_list_entry(xl->buffer,
3444                                                     xl->buffer_size,
3445                                                     &xl->result,
3446                                                     prefix, name,
3447                                                     entry->xe_name_len);
3448                        if (ret)
3449                                break;
3450                }
3451        }
3452
3453        return ret;
3454}
3455
3456static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3457                                           struct buffer_head *blk_bh,
3458                                           xattr_tree_rec_func *rec_func,
3459                                           void *para)
3460{
3461        struct ocfs2_xattr_block *xb =
3462                        (struct ocfs2_xattr_block *)blk_bh->b_data;
3463        struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3464        int ret = 0;
3465        u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3466        u64 p_blkno = 0;
3467
3468        if (!el->l_next_free_rec || !rec_func)
3469                return 0;
3470
3471        while (name_hash > 0) {
3472                ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3473                                          &e_cpos, &num_clusters, el);
3474                if (ret) {
3475                        mlog_errno(ret);
3476                        break;
3477                }
3478
3479                ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3480                               num_clusters, para);
3481                if (ret) {
3482                        if (ret != -ERANGE)
3483                                mlog_errno(ret);
3484                        break;
3485                }
3486
3487                if (e_cpos == 0)
3488                        break;
3489
3490                name_hash = e_cpos - 1;
3491        }
3492
3493        return ret;
3494
3495}
3496
3497static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3498                                     struct buffer_head *root_bh,
3499                                     u64 blkno, u32 cpos, u32 len, void *para)
3500{
3501        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3502                                           ocfs2_list_xattr_bucket, para);
3503}
3504
3505static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3506                                             struct buffer_head *blk_bh,
3507                                             char *buffer,
3508                                             size_t buffer_size)
3509{
3510        int ret;
3511        struct ocfs2_xattr_tree_list xl = {
3512                .buffer = buffer,
3513                .buffer_size = buffer_size,
3514                .result = 0,
3515        };
3516
3517        ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3518                                              ocfs2_list_xattr_tree_rec, &xl);
3519        if (ret) {
3520                mlog_errno(ret);
3521                goto out;
3522        }
3523
3524        ret = xl.result;
3525out:
3526        return ret;
3527}
3528
3529static int cmp_xe(const void *a, const void *b)
3530{
3531        const struct ocfs2_xattr_entry *l = a, *r = b;
3532        u32 l_hash = le32_to_cpu(l->xe_name_hash);
3533        u32 r_hash = le32_to_cpu(r->xe_name_hash);
3534
3535        if (l_hash > r_hash)
3536                return 1;
3537        if (l_hash < r_hash)
3538                return -1;
3539        return 0;
3540}
3541
3542static void swap_xe(void *a, void *b, int size)
3543{
3544        struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3545
3546        tmp = *l;
3547        memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3548        memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3549}
3550
3551/*
3552 * When the ocfs2_xattr_block is filled up, new bucket will be created
3553 * and all the xattr entries will be moved to the new bucket.
3554 * The header goes at the start of the bucket, and the names+values are
3555 * filled from the end.  This is why *target starts as the last buffer.
3556 * Note: we need to sort the entries since they are not saved in order
3557 * in the ocfs2_xattr_block.
3558 */
3559static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3560                                           struct buffer_head *xb_bh,
3561                                           struct ocfs2_xattr_bucket *bucket)
3562{
3563        int i, blocksize = inode->i_sb->s_blocksize;
3564        int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3565        u16 offset, size, off_change;
3566        struct ocfs2_xattr_entry *xe;
3567        struct ocfs2_xattr_block *xb =
3568                                (struct ocfs2_xattr_block *)xb_bh->b_data;
3569        struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3570        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3571        u16 count = le16_to_cpu(xb_xh->xh_count);
3572        char *src = xb_bh->b_data;
3573        char *target = bucket_block(bucket, blks - 1);
3574
3575        mlog(0, "cp xattr from block %llu to bucket %llu\n",
3576             (unsigned long long)xb_bh->b_blocknr,
3577             (unsigned long long)bucket_blkno(bucket));
3578
3579        for (i = 0; i < blks; i++)
3580                memset(bucket_block(bucket, i), 0, blocksize);
3581
3582        /*
3583         * Since the xe_name_offset is based on ocfs2_xattr_header,
3584         * there is a offset change corresponding to the change of
3585         * ocfs2_xattr_header's position.
3586         */
3587        off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3588        xe = &xb_xh->xh_entries[count - 1];
3589        offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3590        size = blocksize - offset;
3591
3592        /* copy all the names and values. */
3593        memcpy(target + offset, src + offset, size);
3594
3595        /* Init new header now. */
3596        xh->xh_count = xb_xh->xh_count;
3597        xh->xh_num_buckets = cpu_to_le16(1);
3598        xh->xh_name_value_len = cpu_to_le16(size);
3599        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3600
3601        /* copy all the entries. */
3602        target = bucket_block(bucket, 0);
3603        offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3604        size = count * sizeof(struct ocfs2_xattr_entry);
3605        memcpy(target + offset, (char *)xb_xh + offset, size);
3606
3607        /* Change the xe offset for all the xe because of the move. */
3608        off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3609                 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3610        for (i = 0; i < count; i++)
3611                le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3612
3613        mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3614             offset, size, off_change);
3615
3616        sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3617             cmp_xe, swap_xe);
3618}
3619
3620/*
3621 * After we move xattr from block to index btree, we have to
3622 * update ocfs2_xattr_search to the new xe and base.
3623 *
3624 * When the entry is in xattr block, xattr_bh indicates the storage place.
3625 * While if the entry is in index b-tree, "bucket" indicates the
3626 * real place of the xattr.
3627 */
3628static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3629                                            struct ocfs2_xattr_search *xs,
3630                                            struct buffer_head *old_bh)
3631{
3632        char *buf = old_bh->b_data;
3633        struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3634        struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3635        int i;
3636
3637        xs->header = bucket_xh(xs->bucket);
3638        xs->base = bucket_block(xs->bucket, 0);
3639        xs->end = xs->base + inode->i_sb->s_blocksize;
3640
3641        if (xs->not_found)
3642                return;
3643
3644        i = xs->here - old_xh->xh_entries;
3645        xs->here = &xs->header->xh_entries[i];
3646}
3647
3648static int ocfs2_xattr_create_index_block(struct inode *inode,
3649                                          struct ocfs2_xattr_search *xs,
3650                                          struct ocfs2_xattr_set_ctxt *ctxt)
3651{
3652        int ret;
3653        u32 bit_off, len;
3654        u64 blkno;
3655        handle_t *handle = ctxt->handle;
3656        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3657        struct ocfs2_inode_info *oi = OCFS2_I(inode);
3658        struct buffer_head *xb_bh = xs->xattr_bh;
3659        struct ocfs2_xattr_block *xb =
3660                        (struct ocfs2_xattr_block *)xb_bh->b_data;
3661        struct ocfs2_xattr_tree_root *xr;
3662        u16 xb_flags = le16_to_cpu(xb->xb_flags);
3663
3664        mlog(0, "create xattr index block for %llu\n",
3665             (unsigned long long)xb_bh->b_blocknr);
3666
3667        BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3668        BUG_ON(!xs->bucket);
3669
3670        /*
3671         * XXX:
3672         * We can use this lock for now, and maybe move to a dedicated mutex
3673         * if performance becomes a problem later.
3674         */
3675        down_write(&oi->ip_alloc_sem);
3676
3677        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3678                                      OCFS2_JOURNAL_ACCESS_WRITE);
3679        if (ret) {
3680                mlog_errno(ret);
3681                goto out;
3682        }
3683
3684        ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3685                                     1, 1, &bit_off, &len);
3686        if (ret) {
3687                mlog_errno(ret);
3688                goto out;
3689        }
3690
3691        /*
3692         * The bucket may spread in many blocks, and
3693         * we will only touch the 1st block and the last block
3694         * in the whole bucket(one for entry and one for data).
3695         */
3696        blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3697
3698        mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3699             (unsigned long long)blkno);
3700
3701        ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3702        if (ret) {
3703                mlog_errno(ret);
3704                goto out;
3705        }
3706
3707        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3708                                                OCFS2_JOURNAL_ACCESS_CREATE);
3709        if (ret) {
3710                mlog_errno(ret);
3711                goto out;
3712        }
3713
3714        ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3715        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3716
3717        ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3718
3719        /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3720        memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3721               offsetof(struct ocfs2_xattr_block, xb_attrs));
3722
3723        xr = &xb->xb_attrs.xb_root;
3724        xr->xt_clusters = cpu_to_le32(1);
3725        xr->xt_last_eb_blk = 0;
3726        xr->xt_list.l_tree_depth = 0;
3727        xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3728        xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3729
3730        xr->xt_list.l_recs[0].e_cpos = 0;
3731        xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3732        xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3733
3734        xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3735
3736        ocfs2_journal_dirty(handle, xb_bh);
3737
3738out:
3739        up_write(&oi->ip_alloc_sem);
3740
3741        return ret;
3742}
3743
3744static int cmp_xe_offset(const void *a, const void *b)
3745{
3746        const struct ocfs2_xattr_entry *l = a, *r = b;
3747        u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3748        u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3749
3750        if (l_name_offset < r_name_offset)
3751                return 1;
3752        if (l_name_offset > r_name_offset)
3753                return -1;
3754        return 0;
3755}
3756
3757/*
3758 * defrag a xattr bucket if we find that the bucket has some
3759 * holes beteen name/value pairs.
3760 * We will move all the name/value pairs to the end of the bucket
3761 * so that we can spare some space for insertion.
3762 */
3763static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3764                                     handle_t *handle,
3765                                     struct ocfs2_xattr_bucket *bucket)
3766{
3767        int ret, i;
3768        size_t end, offset, len, value_len;
3769        struct ocfs2_xattr_header *xh;
3770        char *entries, *buf, *bucket_buf = NULL;
3771        u64 blkno = bucket_blkno(bucket);
3772        u16 xh_free_start;
3773        size_t blocksize = inode->i_sb->s_blocksize;
3774        struct ocfs2_xattr_entry *xe;
3775
3776        /*
3777         * In order to make the operation more efficient and generic,
3778         * we copy all the blocks into a contiguous memory and do the
3779         * defragment there, so if anything is error, we will not touch
3780         * the real block.
3781         */
3782        bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3783        if (!bucket_buf) {
3784                ret = -EIO;
3785                goto out;
3786        }
3787
3788        buf = bucket_buf;
3789        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3790                memcpy(buf, bucket_block(bucket, i), blocksize);
3791
3792        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3793                                                OCFS2_JOURNAL_ACCESS_WRITE);
3794        if (ret < 0) {
3795                mlog_errno(ret);
3796                goto out;
3797        }
3798
3799        xh = (struct ocfs2_xattr_header *)bucket_buf;
3800        entries = (char *)xh->xh_entries;
3801        xh_free_start = le16_to_cpu(xh->xh_free_start);
3802
3803        mlog(0, "adjust xattr bucket in %llu, count = %u, "
3804             "xh_free_start = %u, xh_name_value_len = %u.\n",
3805             (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3806             xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3807
3808        /*
3809         * sort all the entries by their offset.
3810         * the largest will be the first, so that we can
3811         * move them to the end one by one.
3812         */
3813        sort(entries, le16_to_cpu(xh->xh_count),
3814             sizeof(struct ocfs2_xattr_entry),
3815             cmp_xe_offset, swap_xe);
3816
3817        /* Move all name/values to the end of the bucket. */
3818        xe = xh->xh_entries;
3819        end = OCFS2_XATTR_BUCKET_SIZE;
3820        for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3821                offset = le16_to_cpu(xe->xe_name_offset);
3822                if (ocfs2_xattr_is_local(xe))
3823                        value_len = OCFS2_XATTR_SIZE(
3824                                        le64_to_cpu(xe->xe_value_size));
3825                else
3826                        value_len = OCFS2_XATTR_ROOT_SIZE;
3827                len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3828
3829                /*
3830                 * We must make sure that the name/value pair
3831                 * exist in the same block. So adjust end to
3832                 * the previous block end if needed.
3833                 */
3834                if (((end - len) / blocksize !=
3835                        (end - 1) / blocksize))
3836                        end = end - end % blocksize;
3837
3838                if (end > offset + len) {
3839                        memmove(bucket_buf + end - len,
3840                                bucket_buf + offset, len);
3841                        xe->xe_name_offset = cpu_to_le16(end - len);
3842                }
3843
3844                mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3845                                "bucket %llu\n", (unsigned long long)blkno);
3846
3847                end -= len;
3848        }
3849
3850        mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3851                        "bucket %llu\n", (unsigned long long)blkno);
3852
3853        if (xh_free_start == end)
3854                goto out;
3855
3856        memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3857        xh->xh_free_start = cpu_to_le16(end);
3858
3859        /* sort the entries by their name_hash. */
3860        sort(entries, le16_to_cpu(xh->xh_count),
3861             sizeof(struct ocfs2_xattr_entry),
3862             cmp_xe, swap_xe);
3863
3864        buf = bucket_buf;
3865        for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3866                memcpy(bucket_block(bucket, i), buf, blocksize);
3867        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3868
3869out:
3870        kfree(bucket_buf);
3871        return ret;
3872}
3873
3874/*
3875 * prev_blkno points to the start of an existing extent.  new_blkno
3876 * points to a newly allocated extent.  Because we know each of our
3877 * clusters contains more than bucket, we can easily split one cluster
3878 * at a bucket boundary.  So we take the last cluster of the existing
3879 * extent and split it down the middle.  We move the last half of the
3880 * buckets in the last cluster of the existing extent over to the new
3881 * extent.
3882 *
3883 * first_bh is the buffer at prev_blkno so we can update the existing
3884 * extent's bucket count.  header_bh is the bucket were we were hoping
3885 * to insert our xattr.  If the bucket move places the target in the new
3886 * extent, we'll update first_bh and header_bh after modifying the old
3887 * extent.
3888 *
3889 * first_hash will be set as the 1st xe's name_hash in the new extent.
3890 */
3891static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3892                                               handle_t *handle,
3893                                               struct ocfs2_xattr_bucket *first,
3894                                               struct ocfs2_xattr_bucket *target,
3895                                               u64 new_blkno,
3896                                               u32 num_clusters,
3897                                               u32 *first_hash)
3898{
3899        int ret;
3900        struct super_block *sb = inode->i_sb;
3901        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3902        int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3903        int to_move = num_buckets / 2;
3904        u64 src_blkno;
3905        u64 last_cluster_blkno = bucket_blkno(first) +
3906                ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3907
3908        BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3909        BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3910
3911        mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3912             (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3913
3914        ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3915                                     last_cluster_blkno, new_blkno,
3916                                     to_move, first_hash);
3917        if (ret) {
3918                mlog_errno(ret);
3919                goto out;
3920        }
3921
3922        /* This is the first bucket that got moved */
3923        src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3924
3925        /*
3926         * If the target bucket was part of the moved buckets, we need to
3927         * update first and target.
3928         */
3929        if (bucket_blkno(target) >= src_blkno) {
3930                /* Find the block for the new target bucket */
3931                src_blkno = new_blkno +
3932                        (bucket_blkno(target) - src_blkno);
3933
3934                ocfs2_xattr_bucket_relse(first);
3935                ocfs2_xattr_bucket_relse(target);
3936
3937                /*
3938                 * These shouldn't fail - the buffers are in the
3939                 * journal from ocfs2_cp_xattr_bucket().
3940                 */
3941                ret = ocfs2_read_xattr_bucket(first, new_blkno);
3942                if (ret) {
3943                        mlog_errno(ret);
3944                        goto out;
3945                }
3946                ret = ocfs2_read_xattr_bucket(target, src_blkno);
3947                if (ret)
3948                        mlog_errno(ret);
3949
3950        }
3951
3952out:
3953        return ret;
3954}
3955
3956/*
3957 * Find the suitable pos when we divide a bucket into 2.
3958 * We have to make sure the xattrs with the same hash value exist
3959 * in the same bucket.
3960 *
3961 * If this ocfs2_xattr_header covers more than one hash value, find a
3962 * place where the hash value changes.  Try to find the most even split.
3963 * The most common case is that all entries have different hash values,
3964 * and the first check we make will find a place to split.
3965 */
3966static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3967{
3968        struct ocfs2_xattr_entry *entries = xh->xh_entries;
3969        int count = le16_to_cpu(xh->xh_count);
3970        int delta, middle = count / 2;
3971
3972        /*
3973         * We start at the middle.  Each step gets farther away in both
3974         * directions.  We therefore hit the change in hash value
3975         * nearest to the middle.  Note that this loop does not execute for
3976         * count < 2.
3977         */
3978        for (delta = 0; delta < middle; delta++) {
3979                /* Let's check delta earlier than middle */
3980                if (cmp_xe(&entries[middle - delta - 1],
3981                           &entries[middle - delta]))
3982                        return middle - delta;
3983
3984                /* For even counts, don't walk off the end */
3985                if ((middle + delta + 1) == count)
3986                        continue;
3987
3988                /* Now try delta past middle */
3989                if (cmp_xe(&entries[middle + delta],
3990                           &entries[middle + delta + 1]))
3991                        return middle + delta + 1;
3992        }
3993
3994        /* Every entry had the same hash */
3995        return count;
3996}
3997
3998/*
3999 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4000 * first_hash will record the 1st hash of the new bucket.
4001 *
4002 * Normally half of the xattrs will be moved.  But we have to make
4003 * sure that the xattrs with the same hash value are stored in the
4004 * same bucket. If all the xattrs in this bucket have the same hash
4005 * value, the new bucket will be initialized as an empty one and the
4006 * first_hash will be initialized as (hash_value+1).
4007 */
4008static int ocfs2_divide_xattr_bucket(struct inode *inode,
4009                                    handle_t *handle,
4010                                    u64 blk,
4011                                    u64 new_blk,
4012                                    u32 *first_hash,
4013                                    int new_bucket_head)
4014{
4015        int ret, i;
4016        int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
4017        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4018        struct ocfs2_xattr_header *xh;
4019        struct ocfs2_xattr_entry *xe;
4020        int blocksize = inode->i_sb->s_blocksize;
4021
4022        mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4023             (unsigned long long)blk, (unsigned long long)new_blk);
4024
4025        s_bucket = ocfs2_xattr_bucket_new(inode);
4026        t_bucket = ocfs2_xattr_bucket_new(inode);
4027        if (!s_bucket || !t_bucket) {
4028                ret = -ENOMEM;
4029                mlog_errno(ret);
4030                goto out;
4031        }
4032
4033        ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4034        if (ret) {
4035                mlog_errno(ret);
4036                goto out;
4037        }
4038
4039        ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4040                                                OCFS2_JOURNAL_ACCESS_WRITE);
4041        if (ret) {
4042                mlog_errno(ret);
4043                goto out;
4044        }
4045
4046        /*
4047         * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4048         * there's no need to read it.
4049         */
4050        ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4051        if (ret) {
4052                mlog_errno(ret);
4053                goto out;
4054        }
4055
4056        /*
4057         * Hey, if we're overwriting t_bucket, what difference does
4058         * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4059         * same part of ocfs2_cp_xattr_bucket().
4060         */
4061        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4062                                                new_bucket_head ?
4063                                                OCFS2_JOURNAL_ACCESS_CREATE :
4064                                                OCFS2_JOURNAL_ACCESS_WRITE);
4065        if (ret) {
4066                mlog_errno(ret);
4067                goto out;
4068        }
4069
4070        xh = bucket_xh(s_bucket);
4071        count = le16_to_cpu(xh->xh_count);
4072        start = ocfs2_xattr_find_divide_pos(xh);
4073
4074        if (start == count) {
4075                xe = &xh->xh_entries[start-1];
4076
4077                /*
4078                 * initialized a new empty bucket here.
4079                 * The hash value is set as one larger than
4080                 * that of the last entry in the previous bucket.
4081                 */
4082                for (i = 0; i < t_bucket->bu_blocks; i++)
4083                        memset(bucket_block(t_bucket, i), 0, blocksize);
4084
4085                xh = bucket_xh(t_bucket);
4086                xh->xh_free_start = cpu_to_le16(blocksize);
4087                xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4088                le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4089
4090                goto set_num_buckets;
4091        }
4092
4093        /* copy the whole bucket to the new first. */
4094        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4095
4096        /* update the new bucket. */
4097        xh = bucket_xh(t_bucket);
4098
4099        /*
4100         * Calculate the total name/value len and xh_free_start for
4101         * the old bucket first.
4102         */
4103        name_offset = OCFS2_XATTR_BUCKET_SIZE;
4104        name_value_len = 0;
4105        for (i = 0; i < start; i++) {
4106                xe = &xh->xh_entries[i];
4107                xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4108                if (ocfs2_xattr_is_local(xe))
4109                        xe_len +=
4110                           OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4111                else
4112                        xe_len += OCFS2_XATTR_ROOT_SIZE;
4113                name_value_len += xe_len;
4114                if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4115                        name_offset = le16_to_cpu(xe->xe_name_offset);
4116        }
4117
4118        /*
4119         * Now begin the modification to the new bucket.
4120         *
4121         * In the new bucket, We just move the xattr entry to the beginning
4122         * and don't touch the name/value. So there will be some holes in the
4123         * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4124         * called.
4125         */
4126        xe = &xh->xh_entries[start];
4127        len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4128        mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4129             (int)((char *)xe - (char *)xh),
4130             (int)((char *)xh->xh_entries - (char *)xh));
4131        memmove((char *)xh->xh_entries, (char *)xe, len);
4132        xe = &xh->xh_entries[count - start];
4133        len = sizeof(struct ocfs2_xattr_entry) * start;
4134        memset((char *)xe, 0, len);
4135
4136        le16_add_cpu(&xh->xh_count, -start);
4137        le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4138
4139        /* Calculate xh_free_start for the new bucket. */
4140        xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4141        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4142                xe = &xh->xh_entries[i];
4143                xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4144                if (ocfs2_xattr_is_local(xe))
4145                        xe_len +=
4146                           OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4147                else
4148                        xe_len += OCFS2_XATTR_ROOT_SIZE;
4149                if (le16_to_cpu(xe->xe_name_offset) <
4150                    le16_to_cpu(xh->xh_free_start))
4151                        xh->xh_free_start = xe->xe_name_offset;
4152        }
4153
4154set_num_buckets:
4155        /* set xh->xh_num_buckets for the new xh. */
4156        if (new_bucket_head)
4157                xh->xh_num_buckets = cpu_to_le16(1);
4158        else
4159                xh->xh_num_buckets = 0;
4160
4161        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4162
4163        /* store the first_hash of the new bucket. */
4164        if (first_hash)
4165                *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4166
4167        /*
4168         * Now only update the 1st block of the old bucket.  If we
4169         * just added a new empty bucket, there is no need to modify
4170         * it.
4171         */
4172        if (start == count)
4173                goto out;
4174
4175        xh = bucket_xh(s_bucket);
4176        memset(&xh->xh_entries[start], 0,
4177               sizeof(struct ocfs2_xattr_entry) * (count - start));
4178        xh->xh_count = cpu_to_le16(start);
4179        xh->xh_free_start = cpu_to_le16(name_offset);
4180        xh->xh_name_value_len = cpu_to_le16(name_value_len);
4181
4182        ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4183
4184out:
4185        ocfs2_xattr_bucket_free(s_bucket);
4186        ocfs2_xattr_bucket_free(t_bucket);
4187
4188        return ret;
4189}
4190
4191/*
4192 * Copy xattr from one bucket to another bucket.
4193 *
4194 * The caller must make sure that the journal transaction
4195 * has enough space for journaling.
4196 */
4197static int ocfs2_cp_xattr_bucket(struct inode *inode,
4198                                 handle_t *handle,
4199                                 u64 s_blkno,
4200                                 u64 t_blkno,
4201                                 int t_is_new)
4202{
4203        int ret;
4204        struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4205
4206        BUG_ON(s_blkno == t_blkno);
4207
4208        mlog(0, "cp bucket %llu to %llu, target is %d\n",
4209             (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4210             t_is_new);
4211
4212        s_bucket = ocfs2_xattr_bucket_new(inode);
4213        t_bucket = ocfs2_xattr_bucket_new(inode);
4214        if (!s_bucket || !t_bucket) {
4215                ret = -ENOMEM;
4216                mlog_errno(ret);
4217                goto out;
4218        }
4219
4220        ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4221        if (ret)
4222                goto out;
4223
4224        /*
4225         * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4226         * there's no need to read it.
4227         */
4228        ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4229        if (ret)
4230                goto out;
4231
4232        /*
4233         * Hey, if we're overwriting t_bucket, what difference does
4234         * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4235         * cluster to fill, we came here from
4236         * ocfs2_mv_xattr_buckets(), and it is really new -
4237         * ACCESS_CREATE is required.  But we also might have moved data
4238         * out of t_bucket before extending back into it.
4239         * ocfs2_add_new_xattr_bucket() can do this - its call to
4240         * ocfs2_add_new_xattr_cluster() may have created a new extent
4241         * and copied out the end of the old extent.  Then it re-extends
4242         * the old extent back to create space for new xattrs.  That's
4243         * how we get here, and the bucket isn't really new.
4244         */
4245        ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4246                                                t_is_new ?
4247                                                OCFS2_JOURNAL_ACCESS_CREATE :
4248                                                OCFS2_JOURNAL_ACCESS_WRITE);
4249        if (ret)
4250                goto out;
4251
4252        ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4253        ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4254
4255out:
4256        ocfs2_xattr_bucket_free(t_bucket);
4257        ocfs2_xattr_bucket_free(s_bucket);
4258
4259        return ret;
4260}
4261
4262/*
4263 * src_blk points to the start of an existing extent.  last_blk points to
4264 * last cluster in that extent.  to_blk points to a newly allocated
4265 * extent.  We copy the buckets from the cluster at last_blk to the new
4266 * extent.  If start_bucket is non-zero, we skip that many buckets before
4267 * we start copying.  The new extent's xh_num_buckets gets set to the
4268 * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4269 * by the same amount.
4270 */
4271static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4272                                  u64 src_blk, u64 last_blk, u64 to_blk,
4273                                  unsigned int start_bucket,
4274                                  u32 *first_hash)
4275{
4276        int i, ret, credits;
4277        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4278        int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4279        int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4280        struct ocfs2_xattr_bucket *old_first, *new_first;
4281
4282        mlog(0, "mv xattrs from cluster %llu to %llu\n",
4283             (unsigned long long)last_blk, (unsigned long long)to_blk);
4284
4285        BUG_ON(start_bucket >= num_buckets);
4286        if (start_bucket) {
4287                num_buckets -= start_bucket;
4288                last_blk += (start_bucket * blks_per_bucket);
4289        }
4290
4291        /* The first bucket of the original extent */
4292        old_first = ocfs2_xattr_bucket_new(inode);
4293        /* The first bucket of the new extent */
4294        new_first = ocfs2_xattr_bucket_new(inode);
4295        if (!old_first || !new_first) {
4296                ret = -ENOMEM;
4297                mlog_errno(ret);
4298                goto out;
4299        }
4300
4301        ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4302        if (ret) {
4303                mlog_errno(ret);
4304                goto out;
4305        }
4306
4307        /*
4308         * We need to update the first bucket of the old extent and all
4309         * the buckets going to the new extent.
4310         */
4311        credits = ((num_buckets + 1) * blks_per_bucket) +
4312                handle->h_buffer_credits;
4313        ret = ocfs2_extend_trans(handle, credits);
4314        if (ret) {
4315                mlog_errno(ret);
4316                goto out;
4317        }
4318
4319        ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4320                                                OCFS2_JOURNAL_ACCESS_WRITE);
4321        if (ret) {
4322                mlog_errno(ret);
4323                goto out;
4324        }
4325
4326        for (i = 0; i < num_buckets; i++) {
4327                ret = ocfs2_cp_xattr_bucket(inode, handle,
4328                                            last_blk + (i * blks_per_bucket),
4329                                            to_blk + (i * blks_per_bucket),
4330                                            1);
4331                if (ret) {
4332                        mlog_errno(ret);
4333                        goto out;
4334                }
4335        }
4336
4337        /*
4338         * Get the new bucket ready before we dirty anything
4339         * (This actually shouldn't fail, because we already dirtied
4340         * it once in ocfs2_cp_xattr_bucket()).
4341         */
4342        ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4343        if (ret) {
4344                mlog_errno(ret);
4345                goto out;
4346        }
4347        ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4348                                                OCFS2_JOURNAL_ACCESS_WRITE);
4349        if (ret) {
4350                mlog_errno(ret);
4351                goto out;
4352        }
4353
4354        /* Now update the headers */
4355        le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4356        ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4357
4358        bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4359        ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4360
4361        if (first_hash)
4362                *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4363
4364out:
4365        ocfs2_xattr_bucket_free(new_first);
4366        ocfs2_xattr_bucket_free(old_first);
4367        return ret;
4368}
4369
4370/*
4371 * Move some xattrs in this cluster to the new cluster.
4372 * This function should only be called when bucket size == cluster size.
4373 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4374 */
4375static int ocfs2_divide_xattr_cluster(struct inode *inode,
4376                                      handle_t *handle,
4377                                      u64 prev_blk,
4378                                      u64 new_blk,
4379                                      u32 *first_hash)
4380{
4381        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4382        int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4383
4384        BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4385
4386        ret = ocfs2_extend_trans(handle, credits);
4387        if (ret) {
4388                mlog_errno(ret);
4389                return ret;
4390        }
4391
4392        /* Move half of the xattr in start_blk to the next bucket. */
4393        return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4394                                          new_blk, first_hash, 1);
4395}
4396
4397/*
4398 * Move some xattrs from the old cluster to the new one since they are not
4399 * contiguous in ocfs2 xattr tree.
4400 *
4401 * new_blk starts a new separate cluster, and we will move some xattrs from
4402 * prev_blk to it. v_start will be set as the first name hash value in this
4403 * new cluster so that it can be used as e_cpos during tree insertion and
4404 * don't collide with our original b-tree operations. first_bh and header_bh
4405 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4406 * to extend the insert bucket.
4407 *
4408 * The problem is how much xattr should we move to the new one and when should
4409 * we update first_bh and header_bh?
4410 * 1. If cluster size > bucket size, that means the previous cluster has more
4411 *    than 1 bucket, so just move half nums of bucket into the new cluster and
4412 *    update the first_bh and header_bh if the insert bucket has been moved
4413 *    to the new cluster.
4414 * 2. If cluster_size == bucket_size:
4415 *    a) If the previous extent rec has more than one cluster and the insert
4416 *       place isn't in the last cluster, copy the entire last cluster to the
4417 *       new one. This time, we don't need to upate the first_bh and header_bh
4418 *       since they will not be moved into the new cluster.
4419 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4420 *       the new one. And we set the extend flag to zero if the insert place is
4421 *       moved into the new allocated cluster since no extend is needed.
4422 */
4423static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4424                                            handle_t *handle,
4425                                            struct ocfs2_xattr_bucket *first,
4426                                            struct ocfs2_xattr_bucket *target,
4427                                            u64 new_blk,
4428                                            u32 prev_clusters,
4429                                            u32 *v_start,
4430                                            int *extend)
4431{
4432        int ret;
4433
4434        mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4435             (unsigned long long)bucket_blkno(first), prev_clusters,
4436             (unsigned long long)new_blk);
4437
4438        if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4439                ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4440                                                          handle,
4441                                                          first, target,
4442                                                          new_blk,
4443                                                          prev_clusters,
4444                                                          v_start);
4445                if (ret)
4446                        mlog_errno(ret);
4447        } else {
4448                /* The start of the last cluster in the first extent */
4449                u64 last_blk = bucket_blkno(first) +
4450                        ((prev_clusters - 1) *
4451                         ocfs2_clusters_to_blocks(inode->i_sb, 1));
4452
4453                if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4454                        ret = ocfs2_mv_xattr_buckets(inode, handle,
4455                                                     bucket_blkno(first),
4456                                                     last_blk, new_blk, 0,
4457                                                     v_start);
4458                        if (ret)
4459                                mlog_errno(ret);
4460                } else {
4461                        ret = ocfs2_divide_xattr_cluster(inode, handle,
4462                                                         last_blk, new_blk,
4463                                                         v_start);
4464                        if (ret)
4465                                mlog_errno(ret);
4466
4467                        if ((bucket_blkno(target) == last_blk) && extend)
4468                                *extend = 0;
4469                }
4470        }
4471
4472        return ret;
4473}
4474
4475/*
4476 * Add a new cluster for xattr storage.
4477 *
4478 * If the new cluster is contiguous with the previous one, it will be
4479 * appended to the same extent record, and num_clusters will be updated.
4480 * If not, we will insert a new extent for it and move some xattrs in
4481 * the last cluster into the new allocated one.
4482 * We also need to limit the maximum size of a btree leaf, otherwise we'll
4483 * lose the benefits of hashing because we'll have to search large leaves.
4484 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4485 * if it's bigger).
4486 *
4487 * first_bh is the first block of the previous extent rec and header_bh
4488 * indicates the bucket we will insert the new xattrs. They will be updated
4489 * when the header_bh is moved into the new cluster.
4490 */
4491static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4492                                       struct buffer_head *root_bh,
4493                                       struct ocfs2_xattr_bucket *first,
4494                                       struct ocfs2_xattr_bucket *target,
4495                                       u32 *num_clusters,
4496                                       u32 prev_cpos,
4497                                       int *extend,
4498                                       struct ocfs2_xattr_set_ctxt *ctxt)
4499{
4500        int ret;
4501        u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4502        u32 prev_clusters = *num_clusters;
4503        u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4504        u64 block;
4505        handle_t *handle = ctxt->handle;
4506        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4507        struct ocfs2_extent_tree et;
4508
4509        mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4510             "previous xattr blkno = %llu\n",
4511             (unsigned long long)OCFS2_I(inode)->ip_blkno,
4512             prev_cpos, (unsigned long long)bucket_blkno(first));
4513
4514        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4515
4516        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4517                                      OCFS2_JOURNAL_ACCESS_WRITE);
4518        if (ret < 0) {
4519                mlog_errno(ret);
4520                goto leave;
4521        }
4522
4523        ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4524                                     clusters_to_add, &bit_off, &num_bits);
4525        if (ret < 0) {
4526                if (ret != -ENOSPC)
4527                        mlog_errno(ret);
4528                goto leave;
4529        }
4530
4531        BUG_ON(num_bits > clusters_to_add);
4532
4533        block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4534        mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4535             num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4536
4537        if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4538            (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4539             OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4540                /*
4541                 * If this cluster is contiguous with the old one and
4542                 * adding this new cluster, we don't surpass the limit of
4543                 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4544                 * initialized and used like other buckets in the previous
4545                 * cluster.
4546                 * So add it as a contiguous one. The caller will handle
4547                 * its init process.
4548                 */
4549                v_start = prev_cpos + prev_clusters;
4550                *num_clusters = prev_clusters + num_bits;
4551                mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4552                     num_bits);
4553        } else {
4554                ret = ocfs2_adjust_xattr_cross_cluster(inode,
4555                                                       handle,
4556                                                       first,
4557                                                       target,
4558                                                       block,
4559                                                       prev_clusters,
4560                                                       &v_start,
4561                                                       extend);
4562                if (ret) {
4563                        mlog_errno(ret);
4564                        goto leave;
4565                }
4566        }
4567
4568        mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4569             num_bits, (unsigned long long)block, v_start);
4570        ret = ocfs2_insert_extent(handle, &et, v_start, block,
4571                                  num_bits, 0, ctxt->meta_ac);
4572        if (ret < 0) {
4573                mlog_errno(ret);
4574                goto leave;
4575        }
4576
4577        ret = ocfs2_journal_dirty(handle, root_bh);
4578        if (ret < 0)
4579                mlog_errno(ret);
4580
4581leave:
4582        return ret;
4583}
4584
4585/*
4586 * We are given an extent.  'first' is the bucket at the very front of
4587 * the extent.  The extent has space for an additional bucket past
4588 * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4589 * of the target bucket.  We wish to shift every bucket past the target
4590 * down one, filling in that additional space.  When we get back to the
4591 * target, we split the target between itself and the now-empty bucket
4592 * at target+1 (aka, target_blkno + blks_per_bucket).
4593 */
4594static int ocfs2_extend_xattr_bucket(struct inode *inode,
4595                                     handle_t *handle,
4596                                     struct ocfs2_xattr_bucket *first,
4597                                     u64 target_blk,
4598                                     u32 num_clusters)
4599{
4600        int ret, credits;
4601        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4602        u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4603        u64 end_blk;
4604        u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4605
4606        mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4607             "from %llu, len = %u\n", (unsigned long long)target_blk,
4608             (unsigned long long)bucket_blkno(first), num_clusters);
4609
4610        /* The extent must have room for an additional bucket */
4611        BUG_ON(new_bucket >=
4612               (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4613
4614        /* end_blk points to the last existing bucket */
4615        end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4616
4617        /*
4618         * end_blk is the start of the last existing bucket.
4619         * Thus, (end_blk - target_blk) covers the target bucket and
4620         * every bucket after it up to, but not including, the last
4621         * existing bucket.  Then we add the last existing bucket, the
4622         * new bucket, and the first bucket (3 * blk_per_bucket).
4623         */
4624        credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4625                  handle->h_buffer_credits;
4626        ret = ocfs2_extend_trans(handle, credits);
4627        if (ret) {
4628                mlog_errno(ret);
4629                goto out;
4630        }
4631
4632        ret = ocfs2_xattr_bucket_journal_access(handle, first,
4633                                                OCFS2_JOURNAL_ACCESS_WRITE);
4634        if (ret) {
4635                mlog_errno(ret);
4636                goto out;
4637        }
4638
4639        while (end_blk != target_blk) {
4640                ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4641                                            end_blk + blk_per_bucket, 0);
4642                if (ret)
4643                        goto out;
4644                end_blk -= blk_per_bucket;
4645        }
4646
4647        /* Move half of the xattr in target_blkno to the next bucket. */
4648        ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4649                                        target_blk + blk_per_bucket, NULL, 0);
4650
4651        le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4652        ocfs2_xattr_bucket_journal_dirty(handle, first);
4653
4654out:
4655        return ret;
4656}
4657
4658/*
4659 * Add new xattr bucket in an extent record and adjust the buckets
4660 * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4661 * bucket we want to insert into.
4662 *
4663 * In the easy case, we will move all the buckets after target down by
4664 * one. Half of target's xattrs will be moved to the next bucket.
4665 *
4666 * If current cluster is full, we'll allocate a new one.  This may not
4667 * be contiguous.  The underlying calls will make sure that there is
4668 * space for the insert, shifting buckets around if necessary.
4669 * 'target' may be moved by those calls.
4670 */
4671static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4672                                      struct buffer_head *xb_bh,
4673                                      struct ocfs2_xattr_bucket *target,
4674                                      struct ocfs2_xattr_set_ctxt *ctxt)
4675{
4676        struct ocfs2_xattr_block *xb =
4677                        (struct ocfs2_xattr_block *)xb_bh->b_data;
4678        struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4679        struct ocfs2_extent_list *el = &xb_root->xt_list;
4680        u32 name_hash =
4681                le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4682        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4683        int ret, num_buckets, extend = 1;
4684        u64 p_blkno;
4685        u32 e_cpos, num_clusters;
4686        /* The bucket at the front of the extent */
4687        struct ocfs2_xattr_bucket *first;
4688
4689        mlog(0, "Add new xattr bucket starting from %llu\n",
4690             (unsigned long long)bucket_blkno(target));
4691
4692        /* The first bucket of the original extent */
4693        first = ocfs2_xattr_bucket_new(inode);
4694        if (!first) {
4695                ret = -ENOMEM;
4696                mlog_errno(ret);
4697                goto out;
4698        }
4699
4700        ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4701                                  &num_clusters, el);
4702        if (ret) {
4703                mlog_errno(ret);
4704                goto out;
4705        }
4706
4707        ret = ocfs2_read_xattr_bucket(first, p_blkno);
4708        if (ret) {
4709                mlog_errno(ret);
4710                goto out;
4711        }
4712
4713        num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4714        if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4715                /*
4716                 * This can move first+target if the target bucket moves
4717                 * to the new extent.
4718                 */
4719                ret = ocfs2_add_new_xattr_cluster(inode,
4720                                                  xb_bh,
4721                                                  first,
4722                                                  target,
4723                                                  &num_clusters,
4724                                                  e_cpos,
4725                                                  &extend,
4726                                                  ctxt);
4727                if (ret) {
4728                        mlog_errno(ret);
4729                        goto out;
4730                }
4731        }
4732
4733        if (extend) {
4734                ret = ocfs2_extend_xattr_bucket(inode,
4735                                                ctxt->handle,
4736                                                first,
4737                                                bucket_blkno(target),
4738                                                num_clusters);
4739                if (ret)
4740                        mlog_errno(ret);
4741        }
4742
4743out:
4744        ocfs2_xattr_bucket_free(first);
4745
4746        return ret;
4747}
4748
4749static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4750                                        struct ocfs2_xattr_bucket *bucket,
4751                                        int offs)
4752{
4753        int block_off = offs >> inode->i_sb->s_blocksize_bits;
4754
4755        offs = offs % inode->i_sb->s_blocksize;
4756        return bucket_block(bucket, block_off) + offs;
4757}
4758
4759/*
4760 * Handle the normal xattr set, including replace, delete and new.
4761 *
4762 * Note: "local" indicates the real data's locality. So we can't
4763 * just its bucket locality by its length.
4764 */
4765static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4766                                         struct ocfs2_xattr_info *xi,
4767                                         struct ocfs2_xattr_search *xs,
4768                                         u32 name_hash,
4769                                         int local)
4770{
4771        struct ocfs2_xattr_entry *last, *xe;
4772        int name_len = strlen(xi->name);
4773        struct ocfs2_xattr_header *xh = xs->header;
4774        u16 count = le16_to_cpu(xh->xh_count), start;
4775        size_t blocksize = inode->i_sb->s_blocksize;
4776        char *val;
4777        size_t offs, size, new_size;
4778
4779        last = &xh->xh_entries[count];
4780        if (!xs->not_found) {
4781                xe = xs->here;
4782                offs = le16_to_cpu(xe->xe_name_offset);
4783                if (ocfs2_xattr_is_local(xe))
4784                        size = OCFS2_XATTR_SIZE(name_len) +
4785                        OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4786                else
4787                        size = OCFS2_XATTR_SIZE(name_len) +
4788                        OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4789
4790                /*
4791                 * If the new value will be stored outside, xi->value has been
4792                 * initalized as an empty ocfs2_xattr_value_root, and the same
4793                 * goes with xi->value_len, so we can set new_size safely here.
4794                 * See ocfs2_xattr_set_in_bucket.
4795                 */
4796                new_size = OCFS2_XATTR_SIZE(name_len) +
4797                           OCFS2_XATTR_SIZE(xi->value_len);
4798
4799                le16_add_cpu(&xh->xh_name_value_len, -size);
4800                if (xi->value) {
4801                        if (new_size > size)
4802                                goto set_new_name_value;
4803
4804                        /* Now replace the old value with new one. */
4805                        if (local)
4806                                xe->xe_value_size = cpu_to_le64(xi->value_len);
4807                        else
4808                                xe->xe_value_size = 0;
4809
4810                        val = ocfs2_xattr_bucket_get_val(inode,
4811                                                         xs->bucket, offs);
4812                        memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4813                               size - OCFS2_XATTR_SIZE(name_len));
4814                        if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4815                                memcpy(val + OCFS2_XATTR_SIZE(name_len),
4816                                       xi->value, xi->value_len);
4817
4818                        le16_add_cpu(&xh->xh_name_value_len, new_size);
4819                        ocfs2_xattr_set_local(xe, local);
4820                        return;
4821                } else {
4822                        /*
4823                         * Remove the old entry if there is more than one.
4824                         * We don't remove the last entry so that we can
4825                         * use it to indicate the hash value of the empty
4826                         * bucket.
4827                         */
4828                        last -= 1;
4829                        le16_add_cpu(&xh->xh_count, -1);
4830                        if (xh->xh_count) {
4831                                memmove(xe, xe + 1,
4832                                        (void *)last - (void *)xe);
4833                                memset(last, 0,
4834                                       sizeof(struct ocfs2_xattr_entry));
4835                        } else
4836                                xh->xh_free_start =
4837                                        cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4838
4839                        return;
4840                }
4841        } else {
4842                /* find a new entry for insert. */
4843                int low = 0, high = count - 1, tmp;
4844                struct ocfs2_xattr_entry *tmp_xe;
4845
4846                while (low <= high && count) {
4847                        tmp = (low + high) / 2;
4848                        tmp_xe = &xh->xh_entries[tmp];
4849
4850                        if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4851                                low = tmp + 1;
4852                        else if (name_hash <
4853                                 le32_to_cpu(tmp_xe->xe_name_hash))
4854                                high = tmp - 1;
4855                        else {
4856                                low = tmp;
4857                                break;
4858                        }
4859                }
4860
4861                xe = &xh->xh_entries[low];
4862                if (low != count)
4863                        memmove(xe + 1, xe, (void *)last - (void *)xe);
4864
4865                le16_add_cpu(&xh->xh_count, 1);
4866                memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4867                xe->xe_name_hash = cpu_to_le32(name_hash);
4868                xe->xe_name_len = name_len;
4869                ocfs2_xattr_set_type(xe, xi->name_index);
4870        }
4871
4872set_new_name_value:
4873        /* Insert the new name+value. */
4874        size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4875
4876        /*
4877         * We must make sure that the name/value pair
4878         * exists in the same block.
4879         */
4880        offs = le16_to_cpu(xh->xh_free_start);
4881        start = offs - size;
4882
4883        if (start >> inode->i_sb->s_blocksize_bits !=
4884            (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4885                offs = offs - offs % blocksize;
4886                xh->xh_free_start = cpu_to_le16(offs);
4887        }
4888
4889        val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4890        xe->xe_name_offset = cpu_to_le16(offs - size);
4891
4892        memset(val, 0, size);
4893        memcpy(val, xi->name, name_len);
4894        memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4895
4896        xe->xe_value_size = cpu_to_le64(xi->value_len);
4897        ocfs2_xattr_set_local(xe, local);
4898        xs->here = xe;
4899        le16_add_cpu(&xh->xh_free_start, -size);
4900        le16_add_cpu(&xh->xh_name_value_len, size);
4901
4902        return;
4903}
4904
4905/*
4906 * Set the xattr entry in the specified bucket.
4907 * The bucket is indicated by xs->bucket and it should have the enough
4908 * space for the xattr insertion.
4909 */
4910static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4911                                           handle_t *handle,
4912                                           struct ocfs2_xattr_info *xi,
4913                                           struct ocfs2_xattr_search *xs,
4914                                           u32 name_hash,
4915                                           int local)
4916{
4917        int ret;
4918        u64 blkno;
4919
4920        mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4921             (unsigned long)xi->value_len, xi->name_index,
4922             (unsigned long long)bucket_blkno(xs->bucket));
4923
4924        if (!xs->bucket->bu_bhs[1]) {
4925                blkno = bucket_blkno(xs->bucket);
4926                ocfs2_xattr_bucket_relse(xs->bucket);
4927                ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4928                if (ret) {
4929                        mlog_errno(ret);
4930                        goto out;
4931                }
4932        }
4933
4934        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4935                                                OCFS2_JOURNAL_ACCESS_WRITE);
4936        if (ret < 0) {
4937                mlog_errno(ret);
4938                goto out;
4939        }
4940
4941        ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4942        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4943
4944out:
4945        return ret;
4946}
4947
4948/*
4949 * Truncate the specified xe_off entry in xattr bucket.
4950 * bucket is indicated by header_bh and len is the new length.
4951 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4952 *
4953 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4954 */
4955static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4956                                             struct ocfs2_xattr_bucket *bucket,
4957                                             int xe_off,
4958                                             int len,
4959                                             struct ocfs2_xattr_set_ctxt *ctxt)
4960{
4961        int ret, offset;
4962        u64 value_blk;
4963        struct ocfs2_xattr_entry *xe;
4964        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4965        size_t blocksize = inode->i_sb->s_blocksize;
4966        struct ocfs2_xattr_value_buf vb = {
4967                .vb_access = ocfs2_journal_access,
4968        };
4969
4970        xe = &xh->xh_entries[xe_off];
4971
4972        BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4973
4974        offset = le16_to_cpu(xe->xe_name_offset) +
4975                 OCFS2_XATTR_SIZE(xe->xe_name_len);
4976
4977        value_blk = offset / blocksize;
4978
4979        /* We don't allow ocfs2_xattr_value to be stored in different block. */
4980        BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4981
4982        vb.vb_bh = bucket->bu_bhs[value_blk];
4983        BUG_ON(!vb.vb_bh);
4984
4985        vb.vb_xv = (struct ocfs2_xattr_value_root *)
4986                (vb.vb_bh->b_data + offset % blocksize);
4987
4988        /*
4989         * From here on out we have to dirty the bucket.  The generic
4990         * value calls only modify one of the bucket's bhs, but we need
4991         * to send the bucket at once.  So if they error, they *could* have
4992         * modified something.  We have to assume they did, and dirty
4993         * the whole bucket.  This leaves us in a consistent state.
4994         */
4995        mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4996             xe_off, (unsigned long long)bucket_blkno(bucket), len);
4997        ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4998        if (ret) {
4999                mlog_errno(ret);
5000                goto out;
5001        }
5002
5003        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5004                                                OCFS2_JOURNAL_ACCESS_WRITE);
5005        if (ret) {
5006                mlog_errno(ret);
5007                goto out;
5008        }
5009
5010        xe->xe_value_size = cpu_to_le64(len);
5011
5012        ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5013
5014out:
5015        return ret;
5016}
5017
5018static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
5019                                        struct ocfs2_xattr_search *xs,
5020                                        int len,
5021                                        struct ocfs2_xattr_set_ctxt *ctxt)
5022{
5023        int ret, offset;
5024        struct ocfs2_xattr_entry *xe = xs->here;
5025        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
5026
5027        BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
5028
5029        offset = xe - xh->xh_entries;
5030        ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
5031                                                offset, len, ctxt);
5032        if (ret)
5033                mlog_errno(ret);
5034
5035        return ret;
5036}
5037
5038static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
5039                                                handle_t *handle,
5040                                                struct ocfs2_xattr_search *xs,
5041                                                char *val,
5042                                                int value_len)
5043{
5044        int ret, offset, block_off;
5045        struct ocfs2_xattr_value_root *xv;
5046        struct ocfs2_xattr_entry *xe = xs->here;
5047        struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5048        void *base;
5049        struct ocfs2_xattr_value_buf vb = {
5050                .vb_access = ocfs2_journal_access,
5051        };
5052
5053        BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
5054
5055        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
5056                                                xe - xh->xh_entries,
5057                                                &block_off,
5058                                                &offset);
5059        if (ret) {
5060                mlog_errno(ret);
5061                goto out;
5062        }
5063
5064        base = bucket_block(xs->bucket, block_off);
5065        xv = (struct ocfs2_xattr_value_root *)(base + offset +
5066                 OCFS2_XATTR_SIZE(xe->xe_name_len));
5067
5068        vb.vb_xv = xv;
5069        vb.vb_bh = xs->bucket->bu_bhs[block_off];
5070        ret = __ocfs2_xattr_set_value_outside(inode, handle,
5071                                              &vb, val, value_len);
5072        if (ret)
5073                mlog_errno(ret);
5074out:
5075        return ret;
5076}
5077
5078static int ocfs2_rm_xattr_cluster(struct inode *inode,
5079                                  struct buffer_head *root_bh,
5080                                  u64 blkno,
5081                                  u32 cpos,
5082                                  u32 len,
5083                                  void *para)
5084{
5085        int ret;
5086        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5087        struct inode *tl_inode = osb->osb_tl_inode;
5088        handle_t *handle;
5089        struct ocfs2_xattr_block *xb =
5090                        (struct ocfs2_xattr_block *)root_bh->b_data;
5091        struct ocfs2_alloc_context *meta_ac = NULL;
5092        struct ocfs2_cached_dealloc_ctxt dealloc;
5093        struct ocfs2_extent_tree et;
5094
5095        ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5096                                          ocfs2_delete_xattr_in_bucket, para);
5097        if (ret) {
5098                mlog_errno(ret);
5099                return ret;
5100        }
5101
5102        ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5103
5104        ocfs2_init_dealloc_ctxt(&dealloc);
5105
5106        mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5107             cpos, len, (unsigned long long)blkno);
5108
5109        ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5110                                               len);
5111
5112        ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5113        if (ret) {
5114                mlog_errno(ret);
5115                return ret;
5116        }
5117
5118        mutex_lock(&tl_inode->i_mutex);
5119
5120        if (ocfs2_truncate_log_needs_flush(osb)) {
5121                ret = __ocfs2_flush_truncate_log(osb);
5122                if (ret < 0) {
5123                        mlog_errno(ret);
5124                        goto out;
5125                }
5126        }
5127
5128        handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5129        if (IS_ERR(handle)) {
5130                ret = -ENOMEM;
5131                mlog_errno(ret);
5132                goto out;
5133        }
5134
5135        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5136                                      OCFS2_JOURNAL_ACCESS_WRITE);
5137        if (ret) {
5138                mlog_errno(ret);
5139                goto out_commit;
5140        }
5141
5142        ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5143                                  &dealloc);
5144        if (ret) {
5145                mlog_errno(ret);
5146                goto out_commit;
5147        }
5148
5149        le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5150
5151        ret = ocfs2_journal_dirty(handle, root_bh);
5152        if (ret) {
5153                mlog_errno(ret);
5154                goto out_commit;
5155        }
5156
5157        ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5158        if (ret)
5159                mlog_errno(ret);
5160
5161out_commit:
5162        ocfs2_commit_trans(osb, handle);
5163out:
5164        ocfs2_schedule_truncate_log_flush(osb, 1);
5165
5166        mutex_unlock(&tl_inode->i_mutex);
5167
5168        if (meta_ac)
5169                ocfs2_free_alloc_context(meta_ac);
5170
5171        ocfs2_run_deallocs(osb, &dealloc);
5172
5173        return ret;
5174}
5175
5176static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5177                                         handle_t *handle,
5178                                         struct ocfs2_xattr_search *xs)
5179{
5180        struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5181        struct ocfs2_xattr_entry *last = &xh->xh_entries[
5182                                                le16_to_cpu(xh->xh_count) - 1];
5183        int ret = 0;
5184
5185        ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5186                                                OCFS2_JOURNAL_ACCESS_WRITE);
5187        if (ret) {
5188                mlog_errno(ret);
5189                return;
5190        }
5191
5192        /* Remove the old entry. */
5193        memmove(xs->here, xs->here + 1,
5194                (void *)last - (void *)xs->here);
5195        memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5196        le16_add_cpu(&xh->xh_count, -1);
5197
5198        ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5199}
5200
5201/*
5202 * Set the xattr name/value in the bucket specified in xs.
5203 *
5204 * As the new value in xi may be stored in the bucket or in an outside cluster,
5205 * we divide the whole process into 3 steps:
5206 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5207 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5208 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5209 * 4. If the clusters for the new outside value can't be allocated, we need
5210 *    to free the xattr we allocated in set.
5211 */
5212static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5213                                     struct ocfs2_xattr_info *xi,
5214                                     struct ocfs2_xattr_search *xs,
5215                                     struct ocfs2_xattr_set_ctxt *ctxt)
5216{
5217        int ret, local = 1;
5218        size_t value_len;
5219        char *val = (char *)xi->value;
5220        struct ocfs2_xattr_entry *xe = xs->here;
5221        u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
5222                                              strlen(xi->name));
5223
5224        if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5225                /*
5226                 * We need to truncate the xattr storage first.
5227                 *
5228                 * If both the old and new value are stored to
5229                 * outside block, we only need to truncate
5230                 * the storage and then set the value outside.
5231                 *
5232                 * If the new value should be stored within block,
5233                 * we should free all the outside block first and
5234                 * the modification to the xattr block will be done
5235                 * by following steps.
5236                 */
5237                if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5238                        value_len = xi->value_len;
5239                else
5240                        value_len = 0;
5241
5242                ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5243                                                           value_len,
5244                                                           ctxt);
5245                if (ret)
5246                        goto out;
5247
5248                if (value_len)
5249                        goto set_value_outside;
5250        }
5251
5252        value_len = xi->value_len;
5253        /* So we have to handle the inside block change now. */
5254        if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5255                /*
5256                 * If the new value will be stored outside of block,
5257                 * initalize a new empty value root and insert it first.
5258                 */
5259                local = 0;
5260                xi->value = &def_xv;
5261                xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5262        }
5263
5264        ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5265                                              name_hash, local);
5266        if (ret) {
5267                mlog_errno(ret);
5268                goto out;
5269        }
5270
5271        if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5272                goto out;
5273
5274        /* allocate the space now for the outside block storage. */
5275        ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5276                                                   value_len, ctxt);
5277        if (ret) {
5278                mlog_errno(ret);
5279
5280                if (xs->not_found) {
5281                        /*
5282                         * We can't allocate enough clusters for outside
5283                         * storage and we have allocated xattr already,
5284                         * so need to remove it.
5285                         */
5286                        ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5287                }
5288                goto out;
5289        }
5290
5291set_value_outside:
5292        ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5293                                                   xs, val, value_len);
5294out:
5295        return ret;
5296}
5297
5298/*
5299 * check whether the xattr bucket is filled up with the same hash value.
5300 * If we want to insert the xattr with the same hash, return -ENOSPC.
5301 * If we want to insert a xattr with different hash value, go ahead
5302 * and ocfs2_divide_xattr_bucket will handle this.
5303 */
5304static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5305                                              struct ocfs2_xattr_bucket *bucket,
5306                                              const char *name)
5307{
5308        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5309        u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5310
5311        if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5312                return 0;
5313
5314        if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5315            xh->xh_entries[0].xe_name_hash) {
5316                mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5317                     "hash = %u\n",
5318                     (unsigned long long)bucket_blkno(bucket),
5319                     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5320                return -ENOSPC;
5321        }
5322
5323        return 0;
5324}
5325
5326static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5327                                             struct ocfs2_xattr_info *xi,
5328                                             struct ocfs2_xattr_search *xs,
5329                                             struct ocfs2_xattr_set_ctxt *ctxt)
5330{
5331        struct ocfs2_xattr_header *xh;
5332        struct ocfs2_xattr_entry *xe;
5333        u16 count, header_size, xh_free_start;
5334        int free, max_free, need, old;
5335        size_t value_size = 0, name_len = strlen(xi->name);
5336        size_t blocksize = inode->i_sb->s_blocksize;
5337        int ret, allocation = 0;
5338
5339        mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5340
5341try_again:
5342        xh = xs->header;
5343        count = le16_to_cpu(xh->xh_count);
5344        xh_free_start = le16_to_cpu(xh->xh_free_start);
5345        header_size = sizeof(struct ocfs2_xattr_header) +
5346                        count * sizeof(struct ocfs2_xattr_entry);
5347        max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5348                le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5349
5350        mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5351                        "of %u which exceed block size\n",
5352                        (unsigned long long)bucket_blkno(xs->bucket),
5353                        header_size);
5354
5355        if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5356                value_size = OCFS2_XATTR_ROOT_SIZE;
5357        else if (xi->value)
5358                value_size = OCFS2_XATTR_SIZE(xi->value_len);
5359
5360        if (xs->not_found)
5361                need = sizeof(struct ocfs2_xattr_entry) +
5362                        OCFS2_XATTR_SIZE(name_len) + value_size;
5363        else {
5364                need = value_size + OCFS2_XATTR_SIZE(name_len);
5365
5366                /*
5367                 * We only replace the old value if the new length is smaller
5368                 * than the old one. Otherwise we will allocate new space in the
5369                 * bucket to store it.
5370                 */
5371                xe = xs->here;
5372                if (ocfs2_xattr_is_local(xe))
5373                        old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5374                else
5375                        old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5376
5377                if (old >= value_size)
5378                        need = 0;
5379        }
5380
5381        free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5382        /*
5383         * We need to make sure the new name/value pair
5384         * can exist in the same block.
5385         */
5386        if (xh_free_start % blocksize < need)
5387                free -= xh_free_start % blocksize;
5388
5389        mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5390             "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5391             " %u\n", xs->not_found,
5392             (unsigned long long)bucket_blkno(xs->bucket),
5393             free, need, max_free, le16_to_cpu(xh->xh_free_start),
5394             le16_to_cpu(xh->xh_name_value_len));
5395
5396        if (free < need ||
5397            (xs->not_found &&
5398             count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5399                if (need <= max_free &&
5400                    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5401                        /*
5402                         * We can create the space by defragment. Since only the
5403                         * name/value will be moved, the xe shouldn't be changed
5404                         * in xs.
5405                         */
5406                        ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5407                                                        xs->bucket);
5408                        if (ret) {
5409                                mlog_errno(ret);
5410                                goto out;
5411                        }
5412
5413                        xh_free_start = le16_to_cpu(xh->xh_free_start);
5414                        free = xh_free_start - header_size
5415                                - OCFS2_XATTR_HEADER_GAP;
5416                        if (xh_free_start % blocksize < need)
5417                                free -= xh_free_start % blocksize;
5418
5419                        if (free >= need)
5420                                goto xattr_set;
5421
5422                        mlog(0, "Can't get enough space for xattr insert by "
5423                             "defragment. Need %u bytes, but we have %d, so "
5424                             "allocate new bucket for it.\n", need, free);
5425                }
5426
5427                /*
5428                 * We have to add new buckets or clusters and one
5429                 * allocation should leave us enough space for insert.
5430                 */
5431                BUG_ON(allocation);
5432
5433                /*
5434                 * We do not allow for overlapping ranges between buckets. And
5435                 * the maximum number of collisions we will allow for then is
5436                 * one bucket's worth, so check it here whether we need to
5437                 * add a new bucket for the insert.
5438                 */
5439                ret = ocfs2_check_xattr_bucket_collision(inode,
5440                                                         xs->bucket,
5441                                                         xi->name);
5442                if (ret) {
5443                        mlog_errno(ret);
5444                        goto out;
5445                }
5446
5447                ret = ocfs2_add_new_xattr_bucket(inode,
5448                                                 xs->xattr_bh,
5449                                                 xs->bucket,
5450                                                 ctxt);
5451                if (ret) {
5452                        mlog_errno(ret);
5453                        goto out;
5454                }
5455
5456                /*
5457                 * ocfs2_add_new_xattr_bucket() will have updated
5458                 * xs->bucket if it moved, but it will not have updated
5459                 * any of the other search fields.  Thus, we drop it and
5460                 * re-search.  Everything should be cached, so it'll be
5461                 * quick.
5462                 */
5463                ocfs2_xattr_bucket_relse(xs->bucket);
5464                ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5465                                                   xi->name_index,
5466                                                   xi->name, xs);
5467                if (ret && ret != -ENODATA)
5468                        goto out;
5469                xs->not_found = ret;
5470                allocation = 1;
5471                goto try_again;
5472        }
5473
5474xattr_set:
5475        ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5476out:
5477        mlog_exit(ret);
5478        return ret;
5479}
5480
5481static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5482                                        struct ocfs2_xattr_bucket *bucket,
5483                                        void *para)
5484{
5485        int ret = 0, ref_credits;
5486        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5487        u16 i;
5488        struct ocfs2_xattr_entry *xe;
5489        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5490        struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5491        int credits = ocfs2_remove_extent_credits(osb->sb) +
5492                ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5493        struct ocfs2_xattr_value_root *xv;
5494        struct ocfs2_rm_xattr_bucket_para *args =
5495                        (struct ocfs2_rm_xattr_bucket_para *)para;
5496
5497        ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5498
5499        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5500                xe = &xh->xh_entries[i];
5501                if (ocfs2_xattr_is_local(xe))
5502                        continue;
5503
5504                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5505                                                      i, &xv, NULL);
5506
5507                ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5508                                                         args->ref_ci,
5509                                                         args->ref_root_bh,
5510                                                         &ctxt.meta_ac,
5511                                                         &ref_credits);
5512
5513                ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5514                if (IS_ERR(ctxt.handle)) {
5515                        ret = PTR_ERR(ctxt.handle);
5516                        mlog_errno(ret);
5517                        break;
5518                }
5519
5520                ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5521                                                        i, 0, &ctxt);
5522
5523                ocfs2_commit_trans(osb, ctxt.handle);
5524                if (ctxt.meta_ac) {
5525                        ocfs2_free_alloc_context(ctxt.meta_ac);
5526                        ctxt.meta_ac = NULL;
5527                }
5528                if (ret) {
5529                        mlog_errno(ret);
5530                        break;
5531                }
5532        }
5533
5534        if (ctxt.meta_ac)
5535                ocfs2_free_alloc_context(ctxt.meta_ac);
5536        ocfs2_schedule_truncate_log_flush(osb, 1);
5537        ocfs2_run_deallocs(osb, &ctxt.dealloc);
5538        return ret;
5539}
5540
5541/*
5542 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5543 * or change the extent record flag), we need to recalculate
5544 * the metaecc for the whole bucket. So it is done here.
5545 *
5546 * Note:
5547 * We have to give the extra credits for the caller.
5548 */
5549static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5550                                            handle_t *handle,
5551                                            void *para)
5552{
5553        int ret;
5554        struct ocfs2_xattr_bucket *bucket =
5555                        (struct ocfs2_xattr_bucket *)para;
5556
5557        ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5558                                                OCFS2_JOURNAL_ACCESS_WRITE);
5559        if (ret) {
5560                mlog_errno(ret);
5561                return ret;
5562        }
5563
5564        ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5565
5566        return 0;
5567}
5568
5569/*
5570 * Special action we need if the xattr value is refcounted.
5571 *
5572 * 1. If the xattr is refcounted, lock the tree.
5573 * 2. CoW the xattr if we are setting the new value and the value
5574 *    will be stored outside.
5575 * 3. In other case, decrease_refcount will work for us, so just
5576 *    lock the refcount tree, calculate the meta and credits is OK.
5577 *
5578 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5579 * currently CoW is a completed transaction, while this function
5580 * will also lock the allocators and let us deadlock. So we will
5581 * CoW the whole xattr value.
5582 */
5583static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5584                                        struct ocfs2_dinode *di,
5585                                        struct ocfs2_xattr_info *xi,
5586                                        struct ocfs2_xattr_search *xis,
5587                                        struct ocfs2_xattr_search *xbs,
5588                                        struct ocfs2_refcount_tree **ref_tree,
5589                                        int *meta_add,
5590                                        int *credits)
5591{
5592        int ret = 0;
5593        struct ocfs2_xattr_block *xb;
5594        struct ocfs2_xattr_entry *xe;
5595        char *base;
5596        u32 p_cluster, num_clusters;
5597        unsigned int ext_flags;
5598        int name_offset, name_len;
5599        struct ocfs2_xattr_value_buf vb;
5600        struct ocfs2_xattr_bucket *bucket = NULL;
5601        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5602        struct ocfs2_post_refcount refcount;
5603        struct ocfs2_post_refcount *p = NULL;
5604        struct buffer_head *ref_root_bh = NULL;
5605
5606        if (!xis->not_found) {
5607                xe = xis->here;
5608                name_offset = le16_to_cpu(xe->xe_name_offset);
5609                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5610                base = xis->base;
5611                vb.vb_bh = xis->inode_bh;
5612                vb.vb_access = ocfs2_journal_access_di;
5613        } else {
5614                int i, block_off = 0;
5615                xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5616                xe = xbs->here;
5617                name_offset = le16_to_cpu(xe->xe_name_offset);
5618                name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5619                i = xbs->here - xbs->header->xh_entries;
5620
5621                if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5622                        ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5623                                                        bucket_xh(xbs->bucket),
5624                                                        i, &block_off,
5625                                                        &name_offset);
5626                        if (ret) {
5627                                mlog_errno(ret);
5628                                goto out;
5629                        }
5630                        base = bucket_block(xbs->bucket, block_off);
5631                        vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5632                        vb.vb_access = ocfs2_journal_access;
5633
5634                        if (ocfs2_meta_ecc(osb)) {
5635                                /*create parameters for ocfs2_post_refcount. */
5636                                bucket = xbs->bucket;
5637                                refcount.credits = bucket->bu_blocks;
5638                                refcount.para = bucket;
5639                                refcount.func =
5640                                        ocfs2_xattr_bucket_post_refcount;
5641                                p = &refcount;
5642                        }
5643                } else {
5644                        base = xbs->base;
5645                        vb.vb_bh = xbs->xattr_bh;
5646                        vb.vb_access = ocfs2_journal_access_xb;
5647                }
5648        }
5649
5650        if (ocfs2_xattr_is_local(xe))
5651                goto out;
5652
5653        vb.vb_xv = (struct ocfs2_xattr_value_root *)
5654                                (base + name_offset + name_len);
5655
5656        ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5657                                       &num_clusters, &vb.vb_xv->xr_list,
5658                                       &ext_flags);
5659        if (ret) {
5660                mlog_errno(ret);
5661                goto out;
5662        }
5663
5664        /*
5665         * We just need to check the 1st extent record, since we always
5666         * CoW the whole xattr. So there shouldn't be a xattr with
5667         * some REFCOUNT extent recs after the 1st one.
5668         */
5669        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5670                goto out;
5671
5672        ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5673                                       1, ref_tree, &ref_root_bh);
5674        if (ret) {
5675                mlog_errno(ret);
5676                goto out;
5677        }
5678
5679        /*
5680         * If we are deleting the xattr or the new size will be stored inside,
5681         * cool, leave it there, the xattr truncate process will remove them
5682         * for us(it still needs the refcount tree lock and the meta, credits).
5683         * And the worse case is that every cluster truncate will split the
5684         * refcount tree, and make the original extent become 3. So we will need
5685         * 2 * cluster more extent recs at most.
5686         */
5687        if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
5688
5689                ret = ocfs2_refcounted_xattr_delete_need(inode,
5690                                                         &(*ref_tree)->rf_ci,
5691                                                         ref_root_bh, vb.vb_xv,
5692                                                         meta_add, credits);
5693                if (ret)
5694                        mlog_errno(ret);
5695                goto out;
5696        }
5697
5698        ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5699                                       *ref_tree, ref_root_bh, 0,
5700                                       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5701        if (ret)
5702                mlog_errno(ret);
5703
5704out:
5705        brelse(ref_root_bh);
5706        return ret;
5707}
5708
5709/*
5710 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5711 * The physical clusters will be added to refcount tree.
5712 */
5713static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5714                                struct ocfs2_xattr_value_root *xv,
5715                                struct ocfs2_extent_tree *value_et,
5716                                struct ocfs2_caching_info *ref_ci,
5717                                struct buffer_head *ref_root_bh,
5718                                struct ocfs2_cached_dealloc_ctxt *dealloc,
5719                                struct ocfs2_post_refcount *refcount)
5720{
5721        int ret = 0;
5722        u32 clusters = le32_to_cpu(xv->xr_clusters);
5723        u32 cpos, p_cluster, num_clusters;
5724        struct ocfs2_extent_list *el = &xv->xr_list;
5725        unsigned int ext_flags;
5726
5727        cpos = 0;
5728        while (cpos < clusters) {
5729                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5730                                               &num_clusters, el, &ext_flags);
5731
5732                cpos += num_clusters;
5733                if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5734                        continue;
5735
5736                BUG_ON(!p_cluster);
5737
5738                ret = ocfs2_add_refcount_flag(inode, value_et,
5739                                              ref_ci, ref_root_bh,
5740                                              cpos - num_clusters,
5741                                              p_cluster, num_clusters,
5742                                              dealloc, refcount);
5743                if (ret) {
5744                        mlog_errno(ret);
5745                        break;
5746                }
5747        }
5748
5749        return ret;
5750}
5751
5752/*
5753 * Given a normal ocfs2_xattr_header, refcount all the entries which
5754 * have value stored outside.
5755 * Used for xattrs stored in inode and ocfs2_xattr_block.
5756 */
5757static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5758                                struct ocfs2_xattr_value_buf *vb,
5759                                struct ocfs2_xattr_header *header,
5760                                struct ocfs2_caching_info *ref_ci,
5761                                struct buffer_head *ref_root_bh,
5762                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5763{
5764
5765        struct ocfs2_xattr_entry *xe;
5766        struct ocfs2_xattr_value_root *xv;
5767        struct ocfs2_extent_tree et;
5768        int i, ret = 0;
5769
5770        for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5771                xe = &header->xh_entries[i];
5772
5773                if (ocfs2_xattr_is_local(xe))
5774                        continue;
5775
5776                xv = (struct ocfs2_xattr_value_root *)((void *)header +
5777                        le16_to_cpu(xe->xe_name_offset) +
5778                        OCFS2_XATTR_SIZE(xe->xe_name_len));
5779
5780                vb->vb_xv = xv;
5781                ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5782
5783                ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5784                                                        ref_ci, ref_root_bh,
5785                                                        dealloc, NULL);
5786                if (ret) {
5787                        mlog_errno(ret);
5788                        break;
5789                }
5790        }
5791
5792        return ret;
5793}
5794
5795static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5796                                struct buffer_head *fe_bh,
5797                                struct ocfs2_caching_info *ref_ci,
5798                                struct buffer_head *ref_root_bh,
5799                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5800{
5801        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5802        struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5803                                (fe_bh->b_data + inode->i_sb->s_blocksize -
5804                                le16_to_cpu(di->i_xattr_inline_size));
5805        struct ocfs2_xattr_value_buf vb = {
5806                .vb_bh = fe_bh,
5807                .vb_access = ocfs2_journal_access_di,
5808        };
5809
5810        return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5811                                                  ref_ci, ref_root_bh, dealloc);
5812}
5813
5814struct ocfs2_xattr_tree_value_refcount_para {
5815        struct ocfs2_caching_info *ref_ci;
5816        struct buffer_head *ref_root_bh;
5817        struct ocfs2_cached_dealloc_ctxt *dealloc;
5818};
5819
5820static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5821                                           struct ocfs2_xattr_bucket *bucket,
5822                                           int offset,
5823                                           struct ocfs2_xattr_value_root **xv,
5824                                           struct buffer_head **bh)
5825{
5826        int ret, block_off, name_offset;
5827        struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5828        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5829        void *base;
5830
5831        ret = ocfs2_xattr_bucket_get_name_value(sb,
5832                                                bucket_xh(bucket),
5833                                                offset,
5834                                                &block_off,
5835                                                &name_offset);
5836        if (ret) {
5837                mlog_errno(ret);
5838                goto out;
5839        }
5840
5841        base = bucket_block(bucket, block_off);
5842
5843        *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5844                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5845
5846        if (bh)
5847                *bh = bucket->bu_bhs[block_off];
5848out:
5849        return ret;
5850}
5851
5852/*
5853 * For a given xattr bucket, refcount all the entries which
5854 * have value stored outside.
5855 */
5856static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5857                                             struct ocfs2_xattr_bucket *bucket,
5858                                             void *para)
5859{
5860        int i, ret = 0;
5861        struct ocfs2_extent_tree et;
5862        struct ocfs2_xattr_tree_value_refcount_para *ref =
5863                        (struct ocfs2_xattr_tree_value_refcount_para *)para;
5864        struct ocfs2_xattr_header *xh =
5865                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
5866        struct ocfs2_xattr_entry *xe;
5867        struct ocfs2_xattr_value_buf vb = {
5868                .vb_access = ocfs2_journal_access,
5869        };
5870        struct ocfs2_post_refcount refcount = {
5871                .credits = bucket->bu_blocks,
5872                .para = bucket,
5873                .func = ocfs2_xattr_bucket_post_refcount,
5874        };
5875        struct ocfs2_post_refcount *p = NULL;
5876
5877        /* We only need post_refcount if we support metaecc. */
5878        if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
5879                p = &refcount;
5880
5881        mlog(0, "refcount bucket %llu, count = %u\n",
5882             (unsigned long long)bucket_blkno(bucket),
5883             le16_to_cpu(xh->xh_count));
5884        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5885                xe = &xh->xh_entries[i];
5886
5887                if (ocfs2_xattr_is_local(xe))
5888                        continue;
5889
5890                ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
5891                                                      &vb.vb_xv, &vb.vb_bh);
5892                if (ret) {
5893                        mlog_errno(ret);
5894                        break;
5895                }
5896
5897                ocfs2_init_xattr_value_extent_tree(&et,
5898                                                   INODE_CACHE(inode), &vb);
5899
5900                ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
5901                                                        &et, ref->ref_ci,
5902                                                        ref->ref_root_bh,
5903                                                        ref->dealloc, p);
5904                if (ret) {
5905                        mlog_errno(ret);
5906                        break;
5907                }
5908        }
5909
5910        return ret;
5911
5912}
5913
5914static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
5915                                     struct buffer_head *root_bh,
5916                                     u64 blkno, u32 cpos, u32 len, void *para)
5917{
5918        return ocfs2_iterate_xattr_buckets(inode, blkno, len,
5919                                           ocfs2_xattr_bucket_value_refcount,
5920                                           para);
5921}
5922
5923static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
5924                                struct buffer_head *blk_bh,
5925                                struct ocfs2_caching_info *ref_ci,
5926                                struct buffer_head *ref_root_bh,
5927                                struct ocfs2_cached_dealloc_ctxt *dealloc)
5928{
5929        int ret = 0;
5930        struct ocfs2_xattr_block *xb =
5931                                (struct ocfs2_xattr_block *)blk_bh->b_data;
5932
5933        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
5934                struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
5935                struct ocfs2_xattr_value_buf vb = {
5936                        .vb_bh = blk_bh,
5937                        .vb_access = ocfs2_journal_access_xb,
5938                };
5939
5940                ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5941                                                         ref_ci, ref_root_bh,
5942                                                         dealloc);
5943        } else {
5944                struct ocfs2_xattr_tree_value_refcount_para para = {
5945                        .ref_ci = ref_ci,
5946                        .ref_root_bh = ref_root_bh,
5947                        .dealloc = dealloc,
5948                };
5949
5950                ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
5951                                                ocfs2_refcount_xattr_tree_rec,
5952                                                &para);
5953        }
5954
5955        return ret;
5956}
5957
5958int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
5959                                     struct buffer_head *fe_bh,
5960                                     struct ocfs2_caching_info *ref_ci,
5961                                     struct buffer_head *ref_root_bh,
5962                                     struct ocfs2_cached_dealloc_ctxt *dealloc)
5963{
5964        int ret = 0;
5965        struct ocfs2_inode_info *oi = OCFS2_I(inode);
5966        struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5967        struct buffer_head *blk_bh = NULL;
5968
5969        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
5970                ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
5971                                                         ref_ci, ref_root_bh,
5972                                                         dealloc);
5973                if (ret) {
5974                        mlog_errno(ret);
5975                        goto out;
5976                }
5977        }
5978
5979        if (!di->i_xattr_loc)
5980                goto out;
5981
5982        ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
5983                                     &blk_bh);
5984        if (ret < 0) {
5985                mlog_errno(ret);
5986                goto out;
5987        }
5988
5989        ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
5990                                                ref_root_bh, dealloc);
5991        if (ret)
5992                mlog_errno(ret);
5993
5994        brelse(blk_bh);
5995out:
5996
5997        return ret;
5998}
5999
6000typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6001/*
6002 * Store the information we need in xattr reflink.
6003 * old_bh and new_bh are inode bh for the old and new inode.
6004 */
6005struct ocfs2_xattr_reflink {
6006        struct inode *old_inode;
6007        struct inode *new_inode;
6008        struct buffer_head *old_bh;
6009        struct buffer_head *new_bh;
6010        struct ocfs2_caching_info *ref_ci;
6011        struct buffer_head *ref_root_bh;
6012        struct ocfs2_cached_dealloc_ctxt *dealloc;
6013        should_xattr_reflinked *xattr_reflinked;
6014};
6015
6016/*
6017 * Given a xattr header and xe offset,
6018 * return the proper xv and the corresponding bh.
6019 * xattr in inode, block and xattr tree have different implementaions.
6020 */
6021typedef int (get_xattr_value_root)(struct super_block *sb,
6022                                   struct buffer_head *bh,
6023                                   struct ocfs2_xattr_header *xh,
6024                                   int offset,
6025                                   struct ocfs2_xattr_value_root **xv,
6026                                   struct buffer_head **ret_bh,
6027                                   void *para);
6028
6029/*
6030 * Calculate all the xattr value root metadata stored in this xattr header and
6031 * credits we need if we create them from the scratch.
6032 * We use get_xattr_value_root so that all types of xattr container can use it.
6033 */
6034static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6035                                             struct buffer_head *bh,
6036                                             struct ocfs2_xattr_header *xh,
6037                                             int *metas, int *credits,
6038                                             int *num_recs,
6039                                             get_xattr_value_root *func,
6040                                             void *para)
6041{
6042        int i, ret = 0;
6043        struct ocfs2_xattr_value_root *xv;
6044        struct ocfs2_xattr_entry *xe;
6045
6046        for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6047                xe = &xh->xh_entries[i];
6048                if (ocfs2_xattr_is_local(xe))
6049                        continue;
6050
6051                ret = func(sb, bh, xh, i, &xv, NULL, para);
6052                if (ret) {
6053                        mlog_errno(ret);
6054                        break;
6055                }
6056
6057                *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6058                          le16_to_cpu(xv->xr_list.l_next_free_rec);
6059
6060                *credits += ocfs2_calc_extend_credits(sb,
6061                                                &def_xv.xv.xr_list,
6062                                                le32_to_cpu(xv->xr_clusters));
6063
6064                /*
6065                 * If the value is a tree with depth > 1, We don't go deep
6066                 * to the extent block, so just calculate a maximum record num.
6067                 */
6068                if (!xv->xr_list.l_tree_depth)
6069                        *num_recs += xv->xr_list.l_next_free_rec;
6070                else
6071                        *num_recs += ocfs2_clusters_for_bytes(sb,
6072                                                              XATTR_SIZE_MAX);
6073        }
6074
6075        return ret;
6076}
6077
6078/* Used by xattr inode and block to return the right xv and buffer_head. */
6079static int ocfs2_get_xattr_value_root(struct super_block *sb,
6080                                      struct buffer_head *bh,
6081                                      struct ocfs2_xattr_header *xh,
6082                                      int offset,
6083                                      struct ocfs2_xattr_value_root **xv,
6084                                      struct buffer_head **ret_bh,
6085                                      void *para)
6086{
6087        struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6088
6089        *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6090                le16_to_cpu(xe->xe_name_offset) +
6091                OCFS2_XATTR_SIZE(xe->xe_name_len));
6092
6093        if (ret_bh)
6094                *ret_bh = bh;
6095
6096        return 0;
6097}
6098
6099/*
6100 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6101 * It is only used for inline xattr and xattr block.
6102 */
6103static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6104                                        struct ocfs2_xattr_header *xh,
6105                                        struct buffer_head *ref_root_bh,
6106                                        int *credits,
6107                                        struct ocfs2_alloc_context **meta_ac)
6108{
6109        int ret, meta_add = 0, num_recs = 0;
6110        struct ocfs2_refcount_block *rb =
6111                        (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6112
6113        *credits = 0;
6114
6115        ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6116                                                &meta_add, credits, &num_recs,
6117                                                ocfs2_get_xattr_value_root,
6118                                                NULL);
6119        if (ret) {
6120                mlog_errno(ret);
6121                goto out;
6122        }
6123
6124        /*
6125         * We need to add/modify num_recs in refcount tree, so just calculate
6126         * an approximate number we need for refcount tree change.
6127         * Sometimes we need to split the tree, and after split,  half recs
6128         * will be moved to the new block, and a new block can only provide
6129         * half number of recs. So we multiple new blocks by 2.
6130         */
6131        num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6132        meta_add += num_recs;
6133        *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6134        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6135                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6136                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6137        else
6138                *credits += 1;
6139
6140        ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6141        if (ret)
6142                mlog_errno(ret);
6143
6144out:
6145        return ret;
6146}
6147
6148/*
6149 * Given a xattr header, reflink all the xattrs in this container.
6150 * It can be used for inode, block and bucket.
6151 *
6152 * NOTE:
6153 * Before we call this function, the caller has memcpy the xattr in
6154 * old_xh to the new_xh.
6155 *
6156 * If args.xattr_reflinked is set, call it to decide whether the xe should
6157 * be reflinked or not. If not, remove it from the new xattr header.
6158 */
6159static int ocfs2_reflink_xattr_header(handle_t *handle,
6160                                      struct ocfs2_xattr_reflink *args,
6161                                      struct buffer_head *old_bh,
6162                                      struct ocfs2_xattr_header *xh,
6163                                      struct buffer_head *new_bh,
6164                                      struct ocfs2_xattr_header *new_xh,
6165                                      struct ocfs2_xattr_value_buf *vb,
6166                                      struct ocfs2_alloc_context *meta_ac,
6167                                      get_xattr_value_root *func,
6168                                      void *para)
6169{
6170        int ret = 0, i, j;
6171        struct super_block *sb = args->old_inode->i_sb;
6172        struct buffer_head *value_bh;
6173        struct ocfs2_xattr_entry *xe, *last;
6174        struct ocfs2_xattr_value_root *xv, *new_xv;
6175        struct ocfs2_extent_tree data_et;
6176        u32 clusters, cpos, p_cluster, num_clusters;
6177        unsigned int ext_flags = 0;
6178
6179        mlog(0, "reflink xattr in container %llu, count = %u\n",
6180             (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6181
6182        last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6183        for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6184                xe = &xh->xh_entries[i];
6185
6186                if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6187                        xe = &new_xh->xh_entries[j];
6188
6189                        le16_add_cpu(&new_xh->xh_count, -1);
6190                        if (new_xh->xh_count) {
6191                                memmove(xe, xe + 1,
6192                                        (void *)last - (void *)xe);
6193                                memset(last, 0,
6194                                       sizeof(struct ocfs2_xattr_entry));
6195                        }
6196
6197                        /*
6198                         * We don't want j to increase in the next round since
6199                         * it is already moved ahead.
6200                         */
6201                        j--;
6202                        continue;
6203                }
6204
6205                if (ocfs2_xattr_is_local(xe))
6206                        continue;
6207
6208                ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6209                if (ret) {
6210                        mlog_errno(ret);
6211                        break;
6212                }
6213
6214                ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6215                if (ret) {
6216                        mlog_errno(ret);
6217                        break;
6218                }
6219
6220                /*
6221                 * For the xattr which has l_tree_depth = 0, all the extent
6222                 * recs have already be copied to the new xh with the
6223                 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6224                 * increase the refount count int the refcount tree.
6225                 *
6226                 * For the xattr which has l_tree_depth > 0, we need
6227                 * to initialize it to the empty default value root,
6228                 * and then insert the extents one by one.
6229                 */
6230                if (xv->xr_list.l_tree_depth) {
6231                        memcpy(new_xv, &def_xv, sizeof(def_xv));
6232                        vb->vb_xv = new_xv;
6233                        vb->vb_bh = value_bh;
6234                        ocfs2_init_xattr_value_extent_tree(&data_et,
6235                                        INODE_CACHE(args->new_inode), vb);
6236                }
6237
6238                clusters = le32_to_cpu(xv->xr_clusters);
6239                cpos = 0;
6240                while (cpos < clusters) {
6241                        ret = ocfs2_xattr_get_clusters(args->old_inode,
6242                                                       cpos,
6243                                                       &p_cluster,
6244                                                       &num_clusters,
6245                                                       &xv->xr_list,
6246                                                       &ext_flags);
6247                        if (ret) {
6248                                mlog_errno(ret);
6249                                goto out;
6250                        }
6251
6252                        BUG_ON(!p_cluster);
6253
6254                        if (xv->xr_list.l_tree_depth) {
6255                                ret = ocfs2_insert_extent(handle,
6256                                                &data_et, cpos,
6257                                                ocfs2_clusters_to_blocks(
6258                                                        args->old_inode->i_sb,
6259                                                        p_cluster),
6260                                                num_clusters, ext_flags,
6261                                                meta_ac);
6262                                if (ret) {
6263                                        mlog_errno(ret);
6264                                        goto out;
6265                                }
6266                        }
6267
6268                        ret = ocfs2_increase_refcount(handle, args->ref_ci,
6269                                                      args->ref_root_bh,
6270                                                      p_cluster, num_clusters,
6271                                                      meta_ac, args->dealloc);
6272                        if (ret) {
6273                                mlog_errno(ret);
6274                                goto out;
6275                        }
6276
6277                        cpos += num_clusters;
6278                }
6279        }
6280
6281out:
6282        return ret;
6283}
6284
6285static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6286{
6287        int ret = 0, credits = 0;
6288        handle_t *handle;
6289        struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6290        struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6291        int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6292        int header_off = osb->sb->s_blocksize - inline_size;
6293        struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6294                                        (args->old_bh->b_data + header_off);
6295        struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6296                                        (args->new_bh->b_data + header_off);
6297        struct ocfs2_alloc_context *meta_ac = NULL;
6298        struct ocfs2_inode_info *new_oi;
6299        struct ocfs2_dinode *new_di;
6300        struct ocfs2_xattr_value_buf vb = {
6301                .vb_bh = args->new_bh,
6302                .vb_access = ocfs2_journal_access_di,
6303        };
6304
6305        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6306                                                  &credits, &meta_ac);
6307        if (ret) {
6308                mlog_errno(ret);
6309                goto out;
6310        }
6311
6312        handle = ocfs2_start_trans(osb, credits);
6313        if (IS_ERR(handle)) {
6314                ret = PTR_ERR(handle);
6315                mlog_errno(ret);
6316                goto out;
6317        }
6318
6319        ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6320                                      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6321        if (ret) {
6322                mlog_errno(ret);
6323                goto out_commit;
6324        }
6325
6326        memcpy(args->new_bh->b_data + header_off,
6327               args->old_bh->b_data + header_off, inline_size);
6328
6329        new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6330        new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6331
6332        ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6333                                         args->new_bh, new_xh, &vb, meta_ac,
6334                                         ocfs2_get_xattr_value_root, NULL);
6335        if (ret) {
6336                mlog_errno(ret);
6337                goto out_commit;
6338        }
6339
6340        new_oi = OCFS2_I(args->new_inode);
6341        spin_lock(&new_oi->ip_lock);
6342        new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6343        new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6344        spin_unlock(&new_oi->ip_lock);
6345
6346        ocfs2_journal_dirty(handle, args->new_bh);
6347
6348out_commit:
6349        ocfs2_commit_trans(osb, handle);
6350
6351out:
6352        if (meta_ac)
6353                ocfs2_free_alloc_context(meta_ac);
6354        return ret;
6355}
6356
6357static int ocfs2_create_empty_xattr_block(struct inode *inode,
6358                                          struct buffer_head *fe_bh,
6359                                          struct buffer_head **ret_bh,
6360                                          int indexed)
6361{
6362        int ret;
6363        handle_t *handle;
6364        struct ocfs2_alloc_context *meta_ac;
6365        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6366
6367        ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6368        if (ret < 0) {
6369                mlog_errno(ret);
6370                return ret;
6371        }
6372
6373        handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6374        if (IS_ERR(handle)) {
6375                ret = PTR_ERR(handle);
6376                mlog_errno(ret);
6377                goto out;
6378        }
6379
6380        mlog(0, "create new xattr block for inode %llu, index = %d\n",
6381             (unsigned long long)fe_bh->b_blocknr, indexed);
6382        ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6383                                       meta_ac, ret_bh, indexed);
6384        if (ret)
6385                mlog_errno(ret);
6386
6387        ocfs2_commit_trans(osb, handle);
6388out:
6389        ocfs2_free_alloc_context(meta_ac);
6390        return ret;
6391}
6392
6393static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6394                                     struct buffer_head *blk_bh,
6395                                     struct buffer_head *new_blk_bh)
6396{
6397        int ret = 0, credits = 0;
6398        handle_t *handle;
6399        struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6400        struct ocfs2_dinode *new_di;
6401        struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6402        int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6403        struct ocfs2_xattr_block *xb =
6404                        (struct ocfs2_xattr_block *)blk_bh->b_data;
6405        struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6406        struct ocfs2_xattr_block *new_xb =
6407                        (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6408        struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6409        struct ocfs2_alloc_context *meta_ac;
6410        struct ocfs2_xattr_value_buf vb = {
6411                .vb_bh = new_blk_bh,
6412                .vb_access = ocfs2_journal_access_xb,
6413        };
6414
6415        ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6416                                                  &credits, &meta_ac);
6417        if (ret) {
6418                mlog_errno(ret);
6419                return ret;
6420        }
6421
6422        /* One more credits in case we need to add xattr flags in new inode. */
6423        handle = ocfs2_start_trans(osb, credits + 1);
6424        if (IS_ERR(handle)) {
6425                ret = PTR_ERR(handle);
6426                mlog_errno(ret);
6427                goto out;
6428        }
6429
6430        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6431                ret = ocfs2_journal_access_di(handle,
6432                                              INODE_CACHE(args->new_inode),
6433                                              args->new_bh,
6434                                              OCFS2_JOURNAL_ACCESS_WRITE);
6435                if (ret) {
6436                        mlog_errno(ret);
6437                        goto out_commit;
6438                }
6439        }
6440
6441        ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6442                                      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6443        if (ret) {
6444                mlog_errno(ret);
6445                goto out_commit;
6446        }
6447
6448        memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6449               osb->sb->s_blocksize - header_off);
6450
6451        ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6452                                         new_blk_bh, new_xh, &vb, meta_ac,
6453                                         ocfs2_get_xattr_value_root, NULL);
6454        if (ret) {
6455                mlog_errno(ret);
6456                goto out_commit;
6457        }
6458
6459        ocfs2_journal_dirty(handle, new_blk_bh);
6460
6461        if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6462                new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6463                spin_lock(&new_oi->ip_lock);
6464                new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6465                new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6466                spin_unlock(&new_oi->ip_lock);
6467
6468                ocfs2_journal_dirty(handle, args->new_bh);
6469        }
6470
6471out_commit:
6472        ocfs2_commit_trans(osb, handle);
6473
6474out:
6475        ocfs2_free_alloc_context(meta_ac);
6476        return ret;
6477}
6478
6479struct ocfs2_reflink_xattr_tree_args {
6480        struct ocfs2_xattr_reflink *reflink;
6481        struct buffer_head *old_blk_bh;
6482        struct buffer_head *new_blk_bh;
6483        struct ocfs2_xattr_bucket *old_bucket;
6484        struct ocfs2_xattr_bucket *new_bucket;
6485};
6486
6487/*
6488 * NOTE:
6489 * We have to handle the case that both old bucket and new bucket
6490 * will call this function to get the right ret_bh.
6491 * So The caller must give us the right bh.
6492 */
6493static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6494                                        struct buffer_head *bh,
6495                                        struct ocfs2_xattr_header *xh,
6496                                        int offset,
6497                                        struct ocfs2_xattr_value_root **xv,
6498                                        struct buffer_head **ret_bh,
6499                                        void *para)
6500{
6501        struct ocfs2_reflink_xattr_tree_args *args =
6502                        (struct ocfs2_reflink_xattr_tree_args *)para;
6503        struct ocfs2_xattr_bucket *bucket;
6504
6505        if (bh == args->old_bucket->bu_bhs[0])
6506                bucket = args->old_bucket;
6507        else
6508                bucket = args->new_bucket;
6509
6510        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6511                                               xv, ret_bh);
6512}
6513
6514struct ocfs2_value_tree_metas {
6515        int num_metas;
6516        int credits;
6517        int num_recs;
6518};
6519
6520static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6521                                        struct buffer_head *bh,
6522                                        struct ocfs2_xattr_header *xh,
6523                                        int offset,
6524                                        struct ocfs2_xattr_value_root **xv,
6525                                        struct buffer_head **ret_bh,
6526                                        void *para)
6527{
6528        struct ocfs2_xattr_bucket *bucket =
6529                                (struct ocfs2_xattr_bucket *)para;
6530
6531        return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6532                                               xv, ret_bh);
6533}
6534
6535static int ocfs2_calc_value_tree_metas(struct inode *inode,
6536                                      struct ocfs2_xattr_bucket *bucket,
6537                                      void *para)
6538{
6539        struct ocfs2_value_tree_metas *metas =
6540                        (struct ocfs2_value_tree_metas *)para;
6541        struct ocfs2_xattr_header *xh =
6542                        (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6543
6544        /* Add the credits for this bucket first. */
6545        metas->credits += bucket->bu_blocks;
6546        return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6547                                        xh, &metas->num_metas,
6548                                        &metas->credits, &metas->num_recs,
6549                                        ocfs2_value_tree_metas_in_bucket,
6550                                        bucket);
6551}
6552
6553/*
6554 * Given a xattr extent rec starting from blkno and having len clusters,
6555 * iterate all the buckets calculate how much metadata we need for reflinking
6556 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6557 */
6558static int ocfs2_lock_reflink_xattr_rec_allocators(
6559                                struct ocfs2_reflink_xattr_tree_args *args,
6560                                struct ocfs2_extent_tree *xt_et,
6561                                u64 blkno, u32 len, int *credits,
6562                                struct ocfs2_alloc_context **meta_ac,
6563                                struct ocfs2_alloc_context **data_ac)
6564{
6565        int ret, num_free_extents;
6566        struct ocfs2_value_tree_metas metas;
6567        struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6568        struct ocfs2_refcount_block *rb;
6569
6570        memset(&metas, 0, sizeof(metas));
6571
6572        ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6573                                          ocfs2_calc_value_tree_metas, &metas);
6574        if (ret) {
6575                mlog_errno(ret);
6576                goto out;
6577        }
6578
6579        *credits = metas.credits;
6580
6581        /*
6582         * Calculate we need for refcount tree change.
6583         *
6584         * We need to add/modify num_recs in refcount tree, so just calculate
6585         * an approximate number we need for refcount tree change.
6586         * Sometimes we need to split the tree, and after split,  half recs
6587         * will be moved to the new block, and a new block can only provide
6588         * half number of recs. So we multiple new blocks by 2.
6589         * In the end, we have to add credits for modifying the already
6590         * existed refcount block.
6591         */
6592        rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6593        metas.num_recs =
6594                (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6595                 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6596        metas.num_metas += metas.num_recs;
6597        *credits += metas.num_recs +
6598                    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6599        if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6600                *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6601                            le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6602        else
6603                *credits += 1;
6604
6605        /* count in the xattr tree change. */
6606        num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6607        if (num_free_extents < 0) {
6608                ret = num_free_extents;
6609                mlog_errno(ret);
6610                goto out;
6611        }
6612
6613        if (num_free_extents < len)
6614                metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6615
6616        *credits += ocfs2_calc_extend_credits(osb->sb,
6617                                              xt_et->et_root_el, len);
6618
6619        if (metas.num_metas) {
6620                ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6621                                                        meta_ac);
6622                if (ret) {
6623                        mlog_errno(ret);
6624                        goto out;
6625                }
6626        }
6627
6628        if (len) {
6629                ret = ocfs2_reserve_clusters(osb, len, data_ac);
6630                if (ret)
6631                        mlog_errno(ret);
6632        }
6633out:
6634        if (ret) {
6635                if (*meta_ac) {
6636                        ocfs2_free_alloc_context(*meta_ac);
6637                        meta_ac = NULL;
6638                }
6639        }
6640
6641        return ret;
6642}
6643
6644static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6645                                u64 blkno, u64 new_blkno, u32 clusters,
6646                                struct ocfs2_alloc_context *meta_ac,
6647                                struct ocfs2_alloc_context *data_ac,
6648                                struct ocfs2_reflink_xattr_tree_args *args)
6649{
6650        int i, j, ret = 0;
6651        struct super_block *sb = args->reflink->old_inode->i_sb;
6652        u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6653        u32 num_buckets = clusters * bpc;
6654        int bpb = args->old_bucket->bu_blocks;
6655        struct ocfs2_xattr_value_buf vb = {
6656                .vb_access = ocfs2_journal_access,
6657        };
6658
6659        for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6660                ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6661                if (ret) {
6662                        mlog_errno(ret);
6663                        break;
6664                }
6665
6666                ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6667                if (ret) {
6668                        mlog_errno(ret);
6669                        break;
6670                }
6671
6672                /*
6673                 * The real bucket num in this series of blocks is stored
6674                 * in the 1st bucket.
6675                 */
6676                if (i == 0)
6677                        num_buckets = le16_to_cpu(
6678                                bucket_xh(args->old_bucket)->xh_num_buckets);
6679
6680                ret = ocfs2_xattr_bucket_journal_access(handle,
6681                                                args->new_bucket,
6682                                                OCFS2_JOURNAL_ACCESS_CREATE);
6683                if (ret) {
6684                        mlog_errno(ret);
6685                        break;
6686                }
6687
6688                for (j = 0; j < bpb; j++)
6689                        memcpy(bucket_block(args->new_bucket, j),
6690                               bucket_block(args->old_bucket, j),
6691                               sb->s_blocksize);
6692
6693                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6694
6695                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6696                                        args->old_bucket->bu_bhs[0],
6697                                        bucket_xh(args->old_bucket),
6698                                        args->new_bucket->bu_bhs[0],
6699                                        bucket_xh(args->new_bucket),
6700                                        &vb, meta_ac,
6701                                        ocfs2_get_reflink_xattr_value_root,
6702                                        args);
6703                if (ret) {
6704                        mlog_errno(ret);
6705                        break;
6706                }
6707
6708                /*
6709                 * Re-access and dirty the bucket to calculate metaecc.
6710                 * Because we may extend the transaction in reflink_xattr_header
6711                 * which will let the already accessed block gone.
6712                 */
6713                ret = ocfs2_xattr_bucket_journal_access(handle,
6714                                                args->new_bucket,
6715                                                OCFS2_JOURNAL_ACCESS_WRITE);
6716                if (ret) {
6717                        mlog_errno(ret);
6718                        break;
6719                }
6720
6721                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6722                ocfs2_xattr_bucket_relse(args->old_bucket);
6723                ocfs2_xattr_bucket_relse(args->new_bucket);
6724        }
6725
6726        ocfs2_xattr_bucket_relse(args->old_bucket);
6727        ocfs2_xattr_bucket_relse(args->new_bucket);
6728        return ret;
6729}
6730/*
6731 * Create the same xattr extent record in the new inode's xattr tree.
6732 */
6733static int ocfs2_reflink_xattr_rec(struct inode *inode,
6734                                   struct buffer_head *root_bh,
6735                                   u64 blkno,
6736                                   u32 cpos,
6737                                   u32 len,
6738                                   void *para)
6739{
6740        int ret, credits = 0;
6741        u32 p_cluster, num_clusters;
6742        u64 new_blkno;
6743        handle_t *handle;
6744        struct ocfs2_reflink_xattr_tree_args *args =
6745                        (struct ocfs2_reflink_xattr_tree_args *)para;
6746        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6747        struct ocfs2_alloc_context *meta_ac = NULL;
6748        struct ocfs2_alloc_context *data_ac = NULL;
6749        struct ocfs2_extent_tree et;
6750
6751        ocfs2_init_xattr_tree_extent_tree(&et,
6752                                          INODE_CACHE(args->reflink->new_inode),
6753                                          args->new_blk_bh);
6754
6755        ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6756                                                      len, &credits,
6757                                                      &meta_ac, &data_ac);
6758        if (ret) {
6759                mlog_errno(ret);
6760                goto out;
6761        }
6762
6763        handle = ocfs2_start_trans(osb, credits);
6764        if (IS_ERR(handle)) {
6765                ret = PTR_ERR(handle);
6766                mlog_errno(ret);
6767                goto out;
6768        }
6769
6770        ret = ocfs2_claim_clusters(osb, handle, data_ac,
6771                                   len, &p_cluster, &num_clusters);
6772        if (ret) {
6773                mlog_errno(ret);
6774                goto out_commit;
6775        }
6776
6777        new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6778
6779        mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6780             (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6781        ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6782                                          meta_ac, data_ac, args);
6783        if (ret) {
6784                mlog_errno(ret);
6785                goto out_commit;
6786        }
6787
6788        mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6789             (unsigned long long)new_blkno, len, cpos);
6790        ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6791                                  len, 0, meta_ac);
6792        if (ret)
6793                mlog_errno(ret);
6794
6795out_commit:
6796        ocfs2_commit_trans(osb, handle);
6797
6798out:
6799        if (meta_ac)
6800                ocfs2_free_alloc_context(meta_ac);
6801        if (data_ac)
6802                ocfs2_free_alloc_context(data_ac);
6803        return ret;
6804}
6805
6806/*
6807 * Create reflinked xattr buckets.
6808 * We will add bucket one by one, and refcount all the xattrs in the bucket
6809 * if they are stored outside.
6810 */
6811static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6812                                    struct buffer_head *blk_bh,
6813                                    struct buffer_head *new_blk_bh)
6814{
6815        int ret;
6816        struct ocfs2_reflink_xattr_tree_args para;
6817
6818        memset(&para, 0, sizeof(para));
6819        para.reflink = args;
6820        para.old_blk_bh = blk_bh;
6821        para.new_blk_bh = new_blk_bh;
6822
6823        para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6824        if (!para.old_bucket) {
6825                mlog_errno(-ENOMEM);
6826                return -ENOMEM;
6827        }
6828
6829        para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6830        if (!para.new_bucket) {
6831                ret = -ENOMEM;
6832                mlog_errno(ret);
6833                goto out;
6834        }
6835
6836        ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6837                                              ocfs2_reflink_xattr_rec,
6838                                              &para);
6839        if (ret)
6840                mlog_errno(ret);
6841
6842out:
6843        ocfs2_xattr_bucket_free(para.old_bucket);
6844        ocfs2_xattr_bucket_free(para.new_bucket);
6845        return ret;
6846}
6847
6848static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6849                                        struct buffer_head *blk_bh)
6850{
6851        int ret, indexed = 0;
6852        struct buffer_head *new_blk_bh = NULL;
6853        struct ocfs2_xattr_block *xb =
6854                        (struct ocfs2_xattr_block *)blk_bh->b_data;
6855
6856
6857        if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
6858                indexed = 1;
6859
6860        ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
6861                                             &new_blk_bh, indexed);
6862        if (ret) {
6863                mlog_errno(ret);
6864                goto out;
6865        }
6866
6867        if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
6868                ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
6869        else
6870                ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
6871        if (ret)
6872                mlog_errno(ret);
6873
6874out:
6875        brelse(new_blk_bh);
6876        return ret;
6877}
6878
6879static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
6880{
6881        int type = ocfs2_xattr_get_type(xe);
6882
6883        return type != OCFS2_XATTR_INDEX_SECURITY &&
6884               type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
6885               type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
6886}
6887
6888int ocfs2_reflink_xattrs(struct inode *old_inode,
6889                         struct buffer_head *old_bh,
6890                         struct inode *new_inode,
6891                         struct buffer_head *new_bh,
6892                         bool preserve_security)
6893{
6894        int ret;
6895        struct ocfs2_xattr_reflink args;
6896        struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
6897        struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
6898        struct buffer_head *blk_bh = NULL;
6899        struct ocfs2_cached_dealloc_ctxt dealloc;
6900        struct ocfs2_refcount_tree *ref_tree;
6901        struct buffer_head *ref_root_bh = NULL;
6902
6903        ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6904                                       le64_to_cpu(di->i_refcount_loc),
6905                                       1, &ref_tree, &ref_root_bh);
6906        if (ret) {
6907                mlog_errno(ret);
6908                goto out;
6909        }
6910
6911        ocfs2_init_dealloc_ctxt(&dealloc);
6912
6913        args.old_inode = old_inode;
6914        args.new_inode = new_inode;
6915        args.old_bh = old_bh;
6916        args.new_bh = new_bh;
6917        args.ref_ci = &ref_tree->rf_ci;
6918        args.ref_root_bh = ref_root_bh;
6919        args.dealloc = &dealloc;
6920        if (preserve_security)
6921                args.xattr_reflinked = NULL;
6922        else
6923                args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
6924
6925        if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6926                ret = ocfs2_reflink_xattr_inline(&args);
6927                if (ret) {
6928                        mlog_errno(ret);
6929                        goto out_unlock;
6930                }
6931        }
6932
6933        if (!di->i_xattr_loc)
6934                goto out_unlock;
6935
6936        ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
6937                                     &blk_bh);
6938        if (ret < 0) {
6939                mlog_errno(ret);
6940                goto out_unlock;
6941        }
6942
6943        ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
6944        if (ret)
6945                mlog_errno(ret);
6946
6947        brelse(blk_bh);
6948
6949out_unlock:
6950        ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6951                                   ref_tree, 1);
6952        brelse(ref_root_bh);
6953
6954        if (ocfs2_dealloc_has_cluster(&dealloc)) {
6955                ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
6956                ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
6957        }
6958
6959out:
6960        return ret;
6961}
6962
6963/*
6964 * Initialize security and acl for a already created inode.
6965 * Used for reflink a non-preserve-security file.
6966 *
6967 * It uses common api like ocfs2_xattr_set, so the caller
6968 * must not hold any lock expect i_mutex.
6969 */
6970int ocfs2_init_security_and_acl(struct inode *dir,
6971                                struct inode *inode)
6972{
6973        int ret = 0;
6974        struct buffer_head *dir_bh = NULL;
6975        struct ocfs2_security_xattr_info si = {
6976                .enable = 1,
6977        };
6978
6979        ret = ocfs2_init_security_get(inode, dir, &si);
6980        if (!ret) {
6981                ret = ocfs2_xattr_security_set(inode, si.name,
6982                                               si.value, si.value_len,
6983                                               XATTR_CREATE);
6984                if (ret) {
6985                        mlog_errno(ret);
6986                        goto leave;
6987                }
6988        } else if (ret != -EOPNOTSUPP) {
6989                mlog_errno(ret);
6990                goto leave;
6991        }
6992
6993        ret = ocfs2_inode_lock(dir, &dir_bh, 0);
6994        if (ret) {
6995                mlog_errno(ret);
6996                goto leave;
6997        }
6998
6999        ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7000        if (ret)
7001                mlog_errno(ret);
7002
7003        ocfs2_inode_unlock(dir, 0);
7004        brelse(dir_bh);
7005leave:
7006        return ret;
7007}
7008/*
7009 * 'security' attributes support
7010 */
7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7012                                        size_t list_size, const char *name,
7013                                        size_t name_len)
7014{
7015        const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016        const size_t total_len = prefix_len + name_len + 1;
7017
7018        if (list && total_len <= list_size) {
7019                memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7020                memcpy(list + prefix_len, name, name_len);
7021                list[prefix_len + name_len] = '\0';
7022        }
7023        return total_len;
7024}
7025
7026static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
7027                                    void *buffer, size_t size)
7028{
7029        if (strcmp(name, "") == 0)
7030                return -EINVAL;
7031        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
7032                               buffer, size);
7033}
7034
7035static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
7036                                    const void *value, size_t size, int flags)
7037{
7038        if (strcmp(name, "") == 0)
7039                return -EINVAL;
7040
7041        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
7042                               size, flags);
7043}
7044
7045int ocfs2_init_security_get(struct inode *inode,
7046                            struct inode *dir,
7047                            struct ocfs2_security_xattr_info *si)
7048{
7049        /* check whether ocfs2 support feature xattr */
7050        if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7051                return -EOPNOTSUPP;
7052        return security_inode_init_security(inode, dir, &si->name, &si->value,
7053                                            &si->value_len);
7054}
7055
7056int ocfs2_init_security_set(handle_t *handle,
7057                            struct inode *inode,
7058                            struct buffer_head *di_bh,
7059                            struct ocfs2_security_xattr_info *si,
7060                            struct ocfs2_alloc_context *xattr_ac,
7061                            struct ocfs2_alloc_context *data_ac)
7062{
7063        return ocfs2_xattr_set_handle(handle, inode, di_bh,
7064                                     OCFS2_XATTR_INDEX_SECURITY,
7065                                     si->name, si->value, si->value_len, 0,
7066                                     xattr_ac, data_ac);
7067}
7068
7069struct xattr_handler ocfs2_xattr_security_handler = {
7070        .prefix = XATTR_SECURITY_PREFIX,
7071        .list   = ocfs2_xattr_security_list,
7072        .get    = ocfs2_xattr_security_get,
7073        .set    = ocfs2_xattr_security_set,
7074};
7075
7076/*
7077 * 'trusted' attributes support
7078 */
7079static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7080                                       size_t list_size, const char *name,
7081                                       size_t name_len)
7082{
7083        const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084        const size_t total_len = prefix_len + name_len + 1;
7085
7086        if (list && total_len <= list_size) {
7087                memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7088                memcpy(list + prefix_len, name, name_len);
7089                list[prefix_len + name_len] = '\0';
7090        }
7091        return total_len;
7092}
7093
7094static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
7095                                   void *buffer, size_t size)
7096{
7097        if (strcmp(name, "") == 0)
7098                return -EINVAL;
7099        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
7100                               buffer, size);
7101}
7102
7103static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
7104                                   const void *value, size_t size, int flags)
7105{
7106        if (strcmp(name, "") == 0)
7107                return -EINVAL;
7108
7109        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
7110                               size, flags);
7111}
7112
7113struct xattr_handler ocfs2_xattr_trusted_handler = {
7114        .prefix = XATTR_TRUSTED_PREFIX,
7115        .list   = ocfs2_xattr_trusted_list,
7116        .get    = ocfs2_xattr_trusted_get,
7117        .set    = ocfs2_xattr_trusted_set,
7118};
7119
7120/*
7121 * 'user' attributes support
7122 */
7123static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7124                                    size_t list_size, const char *name,
7125                                    size_t name_len)
7126{
7127        const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128        const size_t total_len = prefix_len + name_len + 1;
7129        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7130
7131        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132                return 0;
7133
7134        if (list && total_len <= list_size) {
7135                memcpy(list, XATTR_USER_PREFIX, prefix_len);
7136                memcpy(list + prefix_len, name, name_len);
7137                list[prefix_len + name_len] = '\0';
7138        }
7139        return total_len;
7140}
7141
7142static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
7143                                void *buffer, size_t size)
7144{
7145        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7146
7147        if (strcmp(name, "") == 0)
7148                return -EINVAL;
7149        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150                return -EOPNOTSUPP;
7151        return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7152                               buffer, size);
7153}
7154
7155static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
7156                                const void *value, size_t size, int flags)
7157{
7158        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7159
7160        if (strcmp(name, "") == 0)
7161                return -EINVAL;
7162        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163                return -EOPNOTSUPP;
7164
7165        return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
7166                               size, flags);
7167}
7168
7169struct xattr_handler ocfs2_xattr_user_handler = {
7170        .prefix = XATTR_USER_PREFIX,
7171        .list   = ocfs2_xattr_user_list,
7172        .get    = ocfs2_xattr_user_get,
7173        .set    = ocfs2_xattr_user_set,
7174};
7175