linux/fs/ext4/xattr.c
<<
>>
Prefs
   1/*
   2 * linux/fs/ext4/xattr.c
   3 *
   4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
   5 *
   6 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
   7 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
   8 * Extended attributes for symlinks and special files added per
   9 *  suggestion of Luka Renko <luka.renko@hermes.si>.
  10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  11 *  Red Hat Inc.
  12 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
  13 *  and Andreas Gruenbacher <agruen@suse.de>.
  14 */
  15
  16/*
  17 * Extended attributes are stored directly in inodes (on file systems with
  18 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
  19 * field contains the block number if an inode uses an additional block. All
  20 * attributes must fit in the inode and one additional block. Blocks that
  21 * contain the identical set of attributes may be shared among several inodes.
  22 * Identical blocks are detected by keeping a cache of blocks that have
  23 * recently been accessed.
  24 *
  25 * The attributes in inodes and on blocks have a different header; the entries
  26 * are stored in the same format:
  27 *
  28 *   +------------------+
  29 *   | header           |
  30 *   | entry 1          | |
  31 *   | entry 2          | | growing downwards
  32 *   | entry 3          | v
  33 *   | four null bytes  |
  34 *   | . . .            |
  35 *   | value 1          | ^
  36 *   | value 3          | | growing upwards
  37 *   | value 2          | |
  38 *   +------------------+
  39 *
  40 * The header is followed by multiple entry descriptors. In disk blocks, the
  41 * entry descriptors are kept sorted. In inodes, they are unsorted. The
  42 * attribute values are aligned to the end of the block in no specific order.
  43 *
  44 * Locking strategy
  45 * ----------------
  46 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
  47 * EA blocks are only changed if they are exclusive to an inode, so
  48 * holding xattr_sem also means that nothing but the EA block's reference
  49 * count can change. Multiple writers to the same block are synchronized
  50 * by the buffer lock.
  51 */
  52
  53#include <linux/init.h>
  54#include <linux/fs.h>
  55#include <linux/slab.h>
  56#include <linux/mbcache.h>
  57#include <linux/quotaops.h>
  58#include <linux/rwsem.h>
  59#include "ext4_jbd2.h"
  60#include "ext4.h"
  61#include "xattr.h"
  62#include "acl.h"
  63
  64#ifdef EXT4_XATTR_DEBUG
  65# define ea_idebug(inode, f...) do { \
  66                printk(KERN_DEBUG "inode %s:%lu: ", \
  67                        inode->i_sb->s_id, inode->i_ino); \
  68                printk(f); \
  69                printk("\n"); \
  70        } while (0)
  71# define ea_bdebug(bh, f...) do { \
  72                char b[BDEVNAME_SIZE]; \
  73                printk(KERN_DEBUG "block %s:%lu: ", \
  74                        bdevname(bh->b_bdev, b), \
  75                        (unsigned long) bh->b_blocknr); \
  76                printk(f); \
  77                printk("\n"); \
  78        } while (0)
  79#else
  80# define ea_idebug(inode, fmt, ...)     no_printk(fmt, ##__VA_ARGS__)
  81# define ea_bdebug(bh, fmt, ...)        no_printk(fmt, ##__VA_ARGS__)
  82#endif
  83
  84static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
  85static struct buffer_head *ext4_xattr_cache_find(struct inode *,
  86                                                 struct ext4_xattr_header *,
  87                                                 struct mb_cache_entry **);
  88static void ext4_xattr_rehash(struct ext4_xattr_header *,
  89                              struct ext4_xattr_entry *);
  90static int ext4_xattr_list(struct dentry *dentry, char *buffer,
  91                           size_t buffer_size);
  92
  93static const struct xattr_handler *ext4_xattr_handler_map[] = {
  94        [EXT4_XATTR_INDEX_USER]              = &ext4_xattr_user_handler,
  95#ifdef CONFIG_EXT4_FS_POSIX_ACL
  96        [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
  97        [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
  98#endif
  99        [EXT4_XATTR_INDEX_TRUSTED]           = &ext4_xattr_trusted_handler,
 100#ifdef CONFIG_EXT4_FS_SECURITY
 101        [EXT4_XATTR_INDEX_SECURITY]          = &ext4_xattr_security_handler,
 102#endif
 103};
 104
 105const struct xattr_handler *ext4_xattr_handlers[] = {
 106        &ext4_xattr_user_handler,
 107        &ext4_xattr_trusted_handler,
 108#ifdef CONFIG_EXT4_FS_POSIX_ACL
 109        &posix_acl_access_xattr_handler,
 110        &posix_acl_default_xattr_handler,
 111#endif
 112#ifdef CONFIG_EXT4_FS_SECURITY
 113        &ext4_xattr_security_handler,
 114#endif
 115        NULL
 116};
 117
 118#define EXT4_GET_MB_CACHE(inode)        (((struct ext4_sb_info *) \
 119                                inode->i_sb->s_fs_info)->s_mb_cache)
 120
 121static __le32 ext4_xattr_block_csum(struct inode *inode,
 122                                    sector_t block_nr,
 123                                    struct ext4_xattr_header *hdr)
 124{
 125        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 126        __u32 csum;
 127        __le32 save_csum;
 128        __le64 dsk_block_nr = cpu_to_le64(block_nr);
 129
 130        save_csum = hdr->h_checksum;
 131        hdr->h_checksum = 0;
 132        csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
 133                           sizeof(dsk_block_nr));
 134        csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
 135                           EXT4_BLOCK_SIZE(inode->i_sb));
 136
 137        hdr->h_checksum = save_csum;
 138        return cpu_to_le32(csum);
 139}
 140
 141static int ext4_xattr_block_csum_verify(struct inode *inode,
 142                                        sector_t block_nr,
 143                                        struct ext4_xattr_header *hdr)
 144{
 145        if (ext4_has_metadata_csum(inode->i_sb) &&
 146            (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
 147                return 0;
 148        return 1;
 149}
 150
 151static void ext4_xattr_block_csum_set(struct inode *inode,
 152                                      sector_t block_nr,
 153                                      struct ext4_xattr_header *hdr)
 154{
 155        if (!ext4_has_metadata_csum(inode->i_sb))
 156                return;
 157
 158        hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
 159}
 160
 161static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
 162                                                struct inode *inode,
 163                                                struct buffer_head *bh)
 164{
 165        ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
 166        return ext4_handle_dirty_metadata(handle, inode, bh);
 167}
 168
 169static inline const struct xattr_handler *
 170ext4_xattr_handler(int name_index)
 171{
 172        const struct xattr_handler *handler = NULL;
 173
 174        if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
 175                handler = ext4_xattr_handler_map[name_index];
 176        return handler;
 177}
 178
 179/*
 180 * Inode operation listxattr()
 181 *
 182 * dentry->d_inode->i_mutex: don't care
 183 */
 184ssize_t
 185ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 186{
 187        return ext4_xattr_list(dentry, buffer, size);
 188}
 189
 190static int
 191ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
 192                       void *value_start)
 193{
 194        struct ext4_xattr_entry *e = entry;
 195
 196        while (!IS_LAST_ENTRY(e)) {
 197                struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 198                if ((void *)next >= end)
 199                        return -EIO;
 200                e = next;
 201        }
 202
 203        while (!IS_LAST_ENTRY(entry)) {
 204                if (entry->e_value_size != 0 &&
 205                    (value_start + le16_to_cpu(entry->e_value_offs) <
 206                     (void *)e + sizeof(__u32) ||
 207                     value_start + le16_to_cpu(entry->e_value_offs) +
 208                    le32_to_cpu(entry->e_value_size) > end))
 209                        return -EIO;
 210                entry = EXT4_XATTR_NEXT(entry);
 211        }
 212
 213        return 0;
 214}
 215
 216static inline int
 217ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 218{
 219        int error;
 220
 221        if (buffer_verified(bh))
 222                return 0;
 223
 224        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 225            BHDR(bh)->h_blocks != cpu_to_le32(1))
 226                return -EIO;
 227        if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
 228                return -EIO;
 229        error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
 230                                       bh->b_data);
 231        if (!error)
 232                set_buffer_verified(bh);
 233        return error;
 234}
 235
 236static inline int
 237ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
 238{
 239        size_t value_size = le32_to_cpu(entry->e_value_size);
 240
 241        if (entry->e_value_block != 0 || value_size > size ||
 242            le16_to_cpu(entry->e_value_offs) + value_size > size)
 243                return -EIO;
 244        return 0;
 245}
 246
 247static int
 248ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
 249                      const char *name, size_t size, int sorted)
 250{
 251        struct ext4_xattr_entry *entry;
 252        size_t name_len;
 253        int cmp = 1;
 254
 255        if (name == NULL)
 256                return -EINVAL;
 257        name_len = strlen(name);
 258        entry = *pentry;
 259        for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 260                cmp = name_index - entry->e_name_index;
 261                if (!cmp)
 262                        cmp = name_len - entry->e_name_len;
 263                if (!cmp)
 264                        cmp = memcmp(name, entry->e_name, name_len);
 265                if (cmp <= 0 && (sorted || cmp == 0))
 266                        break;
 267        }
 268        *pentry = entry;
 269        if (!cmp && ext4_xattr_check_entry(entry, size))
 270                        return -EIO;
 271        return cmp ? -ENODATA : 0;
 272}
 273
 274static int
 275ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 276                     void *buffer, size_t buffer_size)
 277{
 278        struct buffer_head *bh = NULL;
 279        struct ext4_xattr_entry *entry;
 280        size_t size;
 281        int error;
 282        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 283
 284        ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
 285                  name_index, name, buffer, (long)buffer_size);
 286
 287        error = -ENODATA;
 288        if (!EXT4_I(inode)->i_file_acl)
 289                goto cleanup;
 290        ea_idebug(inode, "reading block %llu",
 291                  (unsigned long long)EXT4_I(inode)->i_file_acl);
 292        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 293        if (!bh)
 294                goto cleanup;
 295        ea_bdebug(bh, "b_count=%d, refcount=%d",
 296                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 297        if (ext4_xattr_check_block(inode, bh)) {
 298bad_block:
 299                EXT4_ERROR_INODE(inode, "bad block %llu",
 300                                 EXT4_I(inode)->i_file_acl);
 301                error = -EIO;
 302                goto cleanup;
 303        }
 304        ext4_xattr_cache_insert(ext4_mb_cache, bh);
 305        entry = BFIRST(bh);
 306        error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
 307        if (error == -EIO)
 308                goto bad_block;
 309        if (error)
 310                goto cleanup;
 311        size = le32_to_cpu(entry->e_value_size);
 312        if (buffer) {
 313                error = -ERANGE;
 314                if (size > buffer_size)
 315                        goto cleanup;
 316                memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
 317                       size);
 318        }
 319        error = size;
 320
 321cleanup:
 322        brelse(bh);
 323        return error;
 324}
 325
 326int
 327ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 328                     void *buffer, size_t buffer_size)
 329{
 330        struct ext4_xattr_ibody_header *header;
 331        struct ext4_xattr_entry *entry;
 332        struct ext4_inode *raw_inode;
 333        struct ext4_iloc iloc;
 334        size_t size;
 335        void *end;
 336        int error;
 337
 338        if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
 339                return -ENODATA;
 340        error = ext4_get_inode_loc(inode, &iloc);
 341        if (error)
 342                return error;
 343        raw_inode = ext4_raw_inode(&iloc);
 344        header = IHDR(inode, raw_inode);
 345        entry = IFIRST(header);
 346        end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
 347        error = ext4_xattr_check_names(entry, end, entry);
 348        if (error)
 349                goto cleanup;
 350        error = ext4_xattr_find_entry(&entry, name_index, name,
 351                                      end - (void *)entry, 0);
 352        if (error)
 353                goto cleanup;
 354        size = le32_to_cpu(entry->e_value_size);
 355        if (buffer) {
 356                error = -ERANGE;
 357                if (size > buffer_size)
 358                        goto cleanup;
 359                memcpy(buffer, (void *)IFIRST(header) +
 360                       le16_to_cpu(entry->e_value_offs), size);
 361        }
 362        error = size;
 363
 364cleanup:
 365        brelse(iloc.bh);
 366        return error;
 367}
 368
 369/*
 370 * ext4_xattr_get()
 371 *
 372 * Copy an extended attribute into the buffer
 373 * provided, or compute the buffer size required.
 374 * Buffer is NULL to compute the size of the buffer required.
 375 *
 376 * Returns a negative error number on failure, or the number of bytes
 377 * used / required on success.
 378 */
 379int
 380ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 381               void *buffer, size_t buffer_size)
 382{
 383        int error;
 384
 385        if (strlen(name) > 255)
 386                return -ERANGE;
 387
 388        down_read(&EXT4_I(inode)->xattr_sem);
 389        error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
 390                                     buffer_size);
 391        if (error == -ENODATA)
 392                error = ext4_xattr_block_get(inode, name_index, name, buffer,
 393                                             buffer_size);
 394        up_read(&EXT4_I(inode)->xattr_sem);
 395        return error;
 396}
 397
 398static int
 399ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
 400                        char *buffer, size_t buffer_size)
 401{
 402        size_t rest = buffer_size;
 403
 404        for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 405                const struct xattr_handler *handler =
 406                        ext4_xattr_handler(entry->e_name_index);
 407
 408                if (handler) {
 409                        size_t size = handler->list(dentry, buffer, rest,
 410                                                    entry->e_name,
 411                                                    entry->e_name_len,
 412                                                    handler->flags);
 413                        if (buffer) {
 414                                if (size > rest)
 415                                        return -ERANGE;
 416                                buffer += size;
 417                        }
 418                        rest -= size;
 419                }
 420        }
 421        return buffer_size - rest;
 422}
 423
 424static int
 425ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 426{
 427        struct inode *inode = dentry->d_inode;
 428        struct buffer_head *bh = NULL;
 429        int error;
 430        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 431
 432        ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 433                  buffer, (long)buffer_size);
 434
 435        error = 0;
 436        if (!EXT4_I(inode)->i_file_acl)
 437                goto cleanup;
 438        ea_idebug(inode, "reading block %llu",
 439                  (unsigned long long)EXT4_I(inode)->i_file_acl);
 440        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 441        error = -EIO;
 442        if (!bh)
 443                goto cleanup;
 444        ea_bdebug(bh, "b_count=%d, refcount=%d",
 445                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 446        if (ext4_xattr_check_block(inode, bh)) {
 447                EXT4_ERROR_INODE(inode, "bad block %llu",
 448                                 EXT4_I(inode)->i_file_acl);
 449                error = -EIO;
 450                goto cleanup;
 451        }
 452        ext4_xattr_cache_insert(ext4_mb_cache, bh);
 453        error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 454
 455cleanup:
 456        brelse(bh);
 457
 458        return error;
 459}
 460
 461static int
 462ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 463{
 464        struct inode *inode = dentry->d_inode;
 465        struct ext4_xattr_ibody_header *header;
 466        struct ext4_inode *raw_inode;
 467        struct ext4_iloc iloc;
 468        void *end;
 469        int error;
 470
 471        if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
 472                return 0;
 473        error = ext4_get_inode_loc(inode, &iloc);
 474        if (error)
 475                return error;
 476        raw_inode = ext4_raw_inode(&iloc);
 477        header = IHDR(inode, raw_inode);
 478        end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
 479        error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
 480        if (error)
 481                goto cleanup;
 482        error = ext4_xattr_list_entries(dentry, IFIRST(header),
 483                                        buffer, buffer_size);
 484
 485cleanup:
 486        brelse(iloc.bh);
 487        return error;
 488}
 489
 490/*
 491 * ext4_xattr_list()
 492 *
 493 * Copy a list of attribute names into the buffer
 494 * provided, or compute the buffer size required.
 495 * Buffer is NULL to compute the size of the buffer required.
 496 *
 497 * Returns a negative error number on failure, or the number of bytes
 498 * used / required on success.
 499 */
 500static int
 501ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 502{
 503        int ret, ret2;
 504
 505        down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
 506        ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
 507        if (ret < 0)
 508                goto errout;
 509        if (buffer) {
 510                buffer += ret;
 511                buffer_size -= ret;
 512        }
 513        ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
 514        if (ret < 0)
 515                goto errout;
 516        ret += ret2;
 517errout:
 518        up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
 519        return ret;
 520}
 521
 522/*
 523 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
 524 * not set, set it.
 525 */
 526static void ext4_xattr_update_super_block(handle_t *handle,
 527                                          struct super_block *sb)
 528{
 529        if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 530                return;
 531
 532        BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
 533        if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
 534                EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
 535                ext4_handle_dirty_super(handle, sb);
 536        }
 537}
 538
 539/*
 540 * Release the xattr block BH: If the reference count is > 1, decrement it;
 541 * otherwise free the block.
 542 */
 543static void
 544ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 545                         struct buffer_head *bh)
 546{
 547        struct mb_cache_entry *ce = NULL;
 548        int error = 0;
 549        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 550
 551        ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
 552        BUFFER_TRACE(bh, "get_write_access");
 553        error = ext4_journal_get_write_access(handle, bh);
 554        if (error)
 555                goto out;
 556
 557        lock_buffer(bh);
 558        if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
 559                ea_bdebug(bh, "refcount now=0; freeing");
 560                if (ce)
 561                        mb_cache_entry_free(ce);
 562                get_bh(bh);
 563                unlock_buffer(bh);
 564                ext4_free_blocks(handle, inode, bh, 0, 1,
 565                                 EXT4_FREE_BLOCKS_METADATA |
 566                                 EXT4_FREE_BLOCKS_FORGET);
 567        } else {
 568                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
 569                if (ce)
 570                        mb_cache_entry_release(ce);
 571                /*
 572                 * Beware of this ugliness: Releasing of xattr block references
 573                 * from different inodes can race and so we have to protect
 574                 * from a race where someone else frees the block (and releases
 575                 * its journal_head) before we are done dirtying the buffer. In
 576                 * nojournal mode this race is harmless and we actually cannot
 577                 * call ext4_handle_dirty_xattr_block() with locked buffer as
 578                 * that function can call sync_dirty_buffer() so for that case
 579                 * we handle the dirtying after unlocking the buffer.
 580                 */
 581                if (ext4_handle_valid(handle))
 582                        error = ext4_handle_dirty_xattr_block(handle, inode,
 583                                                              bh);
 584                unlock_buffer(bh);
 585                if (!ext4_handle_valid(handle))
 586                        error = ext4_handle_dirty_xattr_block(handle, inode,
 587                                                              bh);
 588                if (IS_SYNC(inode))
 589                        ext4_handle_sync(handle);
 590                dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
 591                ea_bdebug(bh, "refcount now=%d; releasing",
 592                          le32_to_cpu(BHDR(bh)->h_refcount));
 593        }
 594out:
 595        ext4_std_error(inode->i_sb, error);
 596        return;
 597}
 598
 599/*
 600 * Find the available free space for EAs. This also returns the total number of
 601 * bytes used by EA entries.
 602 */
 603static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
 604                                    size_t *min_offs, void *base, int *total)
 605{
 606        for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
 607                if (!last->e_value_block && last->e_value_size) {
 608                        size_t offs = le16_to_cpu(last->e_value_offs);
 609                        if (offs < *min_offs)
 610                                *min_offs = offs;
 611                }
 612                if (total)
 613                        *total += EXT4_XATTR_LEN(last->e_name_len);
 614        }
 615        return (*min_offs - ((void *)last - base) - sizeof(__u32));
 616}
 617
 618static int
 619ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
 620{
 621        struct ext4_xattr_entry *last;
 622        size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
 623
 624        /* Compute min_offs and last. */
 625        last = s->first;
 626        for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
 627                if (!last->e_value_block && last->e_value_size) {
 628                        size_t offs = le16_to_cpu(last->e_value_offs);
 629                        if (offs < min_offs)
 630                                min_offs = offs;
 631                }
 632        }
 633        free = min_offs - ((void *)last - s->base) - sizeof(__u32);
 634        if (!s->not_found) {
 635                if (!s->here->e_value_block && s->here->e_value_size) {
 636                        size_t size = le32_to_cpu(s->here->e_value_size);
 637                        free += EXT4_XATTR_SIZE(size);
 638                }
 639                free += EXT4_XATTR_LEN(name_len);
 640        }
 641        if (i->value) {
 642                if (free < EXT4_XATTR_SIZE(i->value_len) ||
 643                    free < EXT4_XATTR_LEN(name_len) +
 644                           EXT4_XATTR_SIZE(i->value_len))
 645                        return -ENOSPC;
 646        }
 647
 648        if (i->value && s->not_found) {
 649                /* Insert the new name. */
 650                size_t size = EXT4_XATTR_LEN(name_len);
 651                size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
 652                memmove((void *)s->here + size, s->here, rest);
 653                memset(s->here, 0, size);
 654                s->here->e_name_index = i->name_index;
 655                s->here->e_name_len = name_len;
 656                memcpy(s->here->e_name, i->name, name_len);
 657        } else {
 658                if (!s->here->e_value_block && s->here->e_value_size) {
 659                        void *first_val = s->base + min_offs;
 660                        size_t offs = le16_to_cpu(s->here->e_value_offs);
 661                        void *val = s->base + offs;
 662                        size_t size = EXT4_XATTR_SIZE(
 663                                le32_to_cpu(s->here->e_value_size));
 664
 665                        if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
 666                                /* The old and the new value have the same
 667                                   size. Just replace. */
 668                                s->here->e_value_size =
 669                                        cpu_to_le32(i->value_len);
 670                                if (i->value == EXT4_ZERO_XATTR_VALUE) {
 671                                        memset(val, 0, size);
 672                                } else {
 673                                        /* Clear pad bytes first. */
 674                                        memset(val + size - EXT4_XATTR_PAD, 0,
 675                                               EXT4_XATTR_PAD);
 676                                        memcpy(val, i->value, i->value_len);
 677                                }
 678                                return 0;
 679                        }
 680
 681                        /* Remove the old value. */
 682                        memmove(first_val + size, first_val, val - first_val);
 683                        memset(first_val, 0, size);
 684                        s->here->e_value_size = 0;
 685                        s->here->e_value_offs = 0;
 686                        min_offs += size;
 687
 688                        /* Adjust all value offsets. */
 689                        last = s->first;
 690                        while (!IS_LAST_ENTRY(last)) {
 691                                size_t o = le16_to_cpu(last->e_value_offs);
 692                                if (!last->e_value_block &&
 693                                    last->e_value_size && o < offs)
 694                                        last->e_value_offs =
 695                                                cpu_to_le16(o + size);
 696                                last = EXT4_XATTR_NEXT(last);
 697                        }
 698                }
 699                if (!i->value) {
 700                        /* Remove the old name. */
 701                        size_t size = EXT4_XATTR_LEN(name_len);
 702                        last = ENTRY((void *)last - size);
 703                        memmove(s->here, (void *)s->here + size,
 704                                (void *)last - (void *)s->here + sizeof(__u32));
 705                        memset(last, 0, size);
 706                }
 707        }
 708
 709        if (i->value) {
 710                /* Insert the new value. */
 711                s->here->e_value_size = cpu_to_le32(i->value_len);
 712                if (i->value_len) {
 713                        size_t size = EXT4_XATTR_SIZE(i->value_len);
 714                        void *val = s->base + min_offs - size;
 715                        s->here->e_value_offs = cpu_to_le16(min_offs - size);
 716                        if (i->value == EXT4_ZERO_XATTR_VALUE) {
 717                                memset(val, 0, size);
 718                        } else {
 719                                /* Clear the pad bytes first. */
 720                                memset(val + size - EXT4_XATTR_PAD, 0,
 721                                       EXT4_XATTR_PAD);
 722                                memcpy(val, i->value, i->value_len);
 723                        }
 724                }
 725        }
 726        return 0;
 727}
 728
 729struct ext4_xattr_block_find {
 730        struct ext4_xattr_search s;
 731        struct buffer_head *bh;
 732};
 733
 734static int
 735ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 736                      struct ext4_xattr_block_find *bs)
 737{
 738        struct super_block *sb = inode->i_sb;
 739        int error;
 740
 741        ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
 742                  i->name_index, i->name, i->value, (long)i->value_len);
 743
 744        if (EXT4_I(inode)->i_file_acl) {
 745                /* The inode already has an extended attribute block. */
 746                bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
 747                error = -EIO;
 748                if (!bs->bh)
 749                        goto cleanup;
 750                ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
 751                        atomic_read(&(bs->bh->b_count)),
 752                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
 753                if (ext4_xattr_check_block(inode, bs->bh)) {
 754                        EXT4_ERROR_INODE(inode, "bad block %llu",
 755                                         EXT4_I(inode)->i_file_acl);
 756                        error = -EIO;
 757                        goto cleanup;
 758                }
 759                /* Find the named attribute. */
 760                bs->s.base = BHDR(bs->bh);
 761                bs->s.first = BFIRST(bs->bh);
 762                bs->s.end = bs->bh->b_data + bs->bh->b_size;
 763                bs->s.here = bs->s.first;
 764                error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
 765                                              i->name, bs->bh->b_size, 1);
 766                if (error && error != -ENODATA)
 767                        goto cleanup;
 768                bs->s.not_found = error;
 769        }
 770        error = 0;
 771
 772cleanup:
 773        return error;
 774}
 775
 776static int
 777ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 778                     struct ext4_xattr_info *i,
 779                     struct ext4_xattr_block_find *bs)
 780{
 781        struct super_block *sb = inode->i_sb;
 782        struct buffer_head *new_bh = NULL;
 783        struct ext4_xattr_search *s = &bs->s;
 784        struct mb_cache_entry *ce = NULL;
 785        int error = 0;
 786        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
 787
 788#define header(x) ((struct ext4_xattr_header *)(x))
 789
 790        if (i->value && i->value_len > sb->s_blocksize)
 791                return -ENOSPC;
 792        if (s->base) {
 793                ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
 794                                        bs->bh->b_blocknr);
 795                BUFFER_TRACE(bs->bh, "get_write_access");
 796                error = ext4_journal_get_write_access(handle, bs->bh);
 797                if (error)
 798                        goto cleanup;
 799                lock_buffer(bs->bh);
 800
 801                if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 802                        if (ce) {
 803                                mb_cache_entry_free(ce);
 804                                ce = NULL;
 805                        }
 806                        ea_bdebug(bs->bh, "modifying in-place");
 807                        error = ext4_xattr_set_entry(i, s);
 808                        if (!error) {
 809                                if (!IS_LAST_ENTRY(s->first))
 810                                        ext4_xattr_rehash(header(s->base),
 811                                                          s->here);
 812                                ext4_xattr_cache_insert(ext4_mb_cache,
 813                                        bs->bh);
 814                        }
 815                        unlock_buffer(bs->bh);
 816                        if (error == -EIO)
 817                                goto bad_block;
 818                        if (!error)
 819                                error = ext4_handle_dirty_xattr_block(handle,
 820                                                                      inode,
 821                                                                      bs->bh);
 822                        if (error)
 823                                goto cleanup;
 824                        goto inserted;
 825                } else {
 826                        int offset = (char *)s->here - bs->bh->b_data;
 827
 828                        unlock_buffer(bs->bh);
 829                        if (ce) {
 830                                mb_cache_entry_release(ce);
 831                                ce = NULL;
 832                        }
 833                        ea_bdebug(bs->bh, "cloning");
 834                        s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
 835                        error = -ENOMEM;
 836                        if (s->base == NULL)
 837                                goto cleanup;
 838                        memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
 839                        s->first = ENTRY(header(s->base)+1);
 840                        header(s->base)->h_refcount = cpu_to_le32(1);
 841                        s->here = ENTRY(s->base + offset);
 842                        s->end = s->base + bs->bh->b_size;
 843                }
 844        } else {
 845                /* Allocate a buffer where we construct the new block. */
 846                s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
 847                /* assert(header == s->base) */
 848                error = -ENOMEM;
 849                if (s->base == NULL)
 850                        goto cleanup;
 851                header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
 852                header(s->base)->h_blocks = cpu_to_le32(1);
 853                header(s->base)->h_refcount = cpu_to_le32(1);
 854                s->first = ENTRY(header(s->base)+1);
 855                s->here = ENTRY(header(s->base)+1);
 856                s->end = s->base + sb->s_blocksize;
 857        }
 858
 859        error = ext4_xattr_set_entry(i, s);
 860        if (error == -EIO)
 861                goto bad_block;
 862        if (error)
 863                goto cleanup;
 864        if (!IS_LAST_ENTRY(s->first))
 865                ext4_xattr_rehash(header(s->base), s->here);
 866
 867inserted:
 868        if (!IS_LAST_ENTRY(s->first)) {
 869                new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
 870                if (new_bh) {
 871                        /* We found an identical block in the cache. */
 872                        if (new_bh == bs->bh)
 873                                ea_bdebug(new_bh, "keeping");
 874                        else {
 875                                /* The old block is released after updating
 876                                   the inode. */
 877                                error = dquot_alloc_block(inode,
 878                                                EXT4_C2B(EXT4_SB(sb), 1));
 879                                if (error)
 880                                        goto cleanup;
 881                                BUFFER_TRACE(new_bh, "get_write_access");
 882                                error = ext4_journal_get_write_access(handle,
 883                                                                      new_bh);
 884                                if (error)
 885                                        goto cleanup_dquot;
 886                                lock_buffer(new_bh);
 887                                le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
 888                                ea_bdebug(new_bh, "reusing; refcount now=%d",
 889                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
 890                                unlock_buffer(new_bh);
 891                                error = ext4_handle_dirty_xattr_block(handle,
 892                                                                      inode,
 893                                                                      new_bh);
 894                                if (error)
 895                                        goto cleanup_dquot;
 896                        }
 897                        mb_cache_entry_release(ce);
 898                        ce = NULL;
 899                } else if (bs->bh && s->base == bs->bh->b_data) {
 900                        /* We were modifying this block in-place. */
 901                        ea_bdebug(bs->bh, "keeping this block");
 902                        new_bh = bs->bh;
 903                        get_bh(new_bh);
 904                } else {
 905                        /* We need to allocate a new block */
 906                        ext4_fsblk_t goal, block;
 907
 908                        goal = ext4_group_first_block_no(sb,
 909                                                EXT4_I(inode)->i_block_group);
 910
 911                        /* non-extent files can't have physical blocks past 2^32 */
 912                        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 913                                goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 914
 915                        block = ext4_new_meta_blocks(handle, inode, goal, 0,
 916                                                     NULL, &error);
 917                        if (error)
 918                                goto cleanup;
 919
 920                        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 921                                BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
 922
 923                        ea_idebug(inode, "creating block %llu",
 924                                  (unsigned long long)block);
 925
 926                        new_bh = sb_getblk(sb, block);
 927                        if (unlikely(!new_bh)) {
 928                                error = -ENOMEM;
 929getblk_failed:
 930                                ext4_free_blocks(handle, inode, NULL, block, 1,
 931                                                 EXT4_FREE_BLOCKS_METADATA);
 932                                goto cleanup;
 933                        }
 934                        lock_buffer(new_bh);
 935                        error = ext4_journal_get_create_access(handle, new_bh);
 936                        if (error) {
 937                                unlock_buffer(new_bh);
 938                                error = -EIO;
 939                                goto getblk_failed;
 940                        }
 941                        memcpy(new_bh->b_data, s->base, new_bh->b_size);
 942                        set_buffer_uptodate(new_bh);
 943                        unlock_buffer(new_bh);
 944                        ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
 945                        error = ext4_handle_dirty_xattr_block(handle,
 946                                                              inode, new_bh);
 947                        if (error)
 948                                goto cleanup;
 949                }
 950        }
 951
 952        /* Update the inode. */
 953        EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 954
 955        /* Drop the previous xattr block. */
 956        if (bs->bh && bs->bh != new_bh)
 957                ext4_xattr_release_block(handle, inode, bs->bh);
 958        error = 0;
 959
 960cleanup:
 961        if (ce)
 962                mb_cache_entry_release(ce);
 963        brelse(new_bh);
 964        if (!(bs->bh && s->base == bs->bh->b_data))
 965                kfree(s->base);
 966
 967        return error;
 968
 969cleanup_dquot:
 970        dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
 971        goto cleanup;
 972
 973bad_block:
 974        EXT4_ERROR_INODE(inode, "bad block %llu",
 975                         EXT4_I(inode)->i_file_acl);
 976        goto cleanup;
 977
 978#undef header
 979}
 980
 981int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 982                          struct ext4_xattr_ibody_find *is)
 983{
 984        struct ext4_xattr_ibody_header *header;
 985        struct ext4_inode *raw_inode;
 986        int error;
 987
 988        if (EXT4_I(inode)->i_extra_isize == 0)
 989                return 0;
 990        raw_inode = ext4_raw_inode(&is->iloc);
 991        header = IHDR(inode, raw_inode);
 992        is->s.base = is->s.first = IFIRST(header);
 993        is->s.here = is->s.first;
 994        is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
 995        if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
 996                error = ext4_xattr_check_names(IFIRST(header), is->s.end,
 997                                               IFIRST(header));
 998                if (error)
 999                        return error;
1000                /* Find the named attribute. */
1001                error = ext4_xattr_find_entry(&is->s.here, i->name_index,
1002                                              i->name, is->s.end -
1003                                              (void *)is->s.base, 0);
1004                if (error && error != -ENODATA)
1005                        return error;
1006                is->s.not_found = error;
1007        }
1008        return 0;
1009}
1010
1011int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
1012                                struct ext4_xattr_info *i,
1013                                struct ext4_xattr_ibody_find *is)
1014{
1015        struct ext4_xattr_ibody_header *header;
1016        struct ext4_xattr_search *s = &is->s;
1017        int error;
1018
1019        if (EXT4_I(inode)->i_extra_isize == 0)
1020                return -ENOSPC;
1021        error = ext4_xattr_set_entry(i, s);
1022        if (error) {
1023                if (error == -ENOSPC &&
1024                    ext4_has_inline_data(inode)) {
1025                        error = ext4_try_to_evict_inline_data(handle, inode,
1026                                        EXT4_XATTR_LEN(strlen(i->name) +
1027                                        EXT4_XATTR_SIZE(i->value_len)));
1028                        if (error)
1029                                return error;
1030                        error = ext4_xattr_ibody_find(inode, i, is);
1031                        if (error)
1032                                return error;
1033                        error = ext4_xattr_set_entry(i, s);
1034                }
1035                if (error)
1036                        return error;
1037        }
1038        header = IHDR(inode, ext4_raw_inode(&is->iloc));
1039        if (!IS_LAST_ENTRY(s->first)) {
1040                header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1041                ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1042        } else {
1043                header->h_magic = cpu_to_le32(0);
1044                ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1045        }
1046        return 0;
1047}
1048
1049static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
1050                                struct ext4_xattr_info *i,
1051                                struct ext4_xattr_ibody_find *is)
1052{
1053        struct ext4_xattr_ibody_header *header;
1054        struct ext4_xattr_search *s = &is->s;
1055        int error;
1056
1057        if (EXT4_I(inode)->i_extra_isize == 0)
1058                return -ENOSPC;
1059        error = ext4_xattr_set_entry(i, s);
1060        if (error)
1061                return error;
1062        header = IHDR(inode, ext4_raw_inode(&is->iloc));
1063        if (!IS_LAST_ENTRY(s->first)) {
1064                header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1065                ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1066        } else {
1067                header->h_magic = cpu_to_le32(0);
1068                ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1069        }
1070        return 0;
1071}
1072
1073/*
1074 * ext4_xattr_set_handle()
1075 *
1076 * Create, replace or remove an extended attribute for this inode.  Value
1077 * is NULL to remove an existing extended attribute, and non-NULL to
1078 * either replace an existing extended attribute, or create a new extended
1079 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
1080 * specify that an extended attribute must exist and must not exist
1081 * previous to the call, respectively.
1082 *
1083 * Returns 0, or a negative error number on failure.
1084 */
1085int
1086ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1087                      const char *name, const void *value, size_t value_len,
1088                      int flags)
1089{
1090        struct ext4_xattr_info i = {
1091                .name_index = name_index,
1092                .name = name,
1093                .value = value,
1094                .value_len = value_len,
1095
1096        };
1097        struct ext4_xattr_ibody_find is = {
1098                .s = { .not_found = -ENODATA, },
1099        };
1100        struct ext4_xattr_block_find bs = {
1101                .s = { .not_found = -ENODATA, },
1102        };
1103        unsigned long no_expand;
1104        int error;
1105
1106        if (!name)
1107                return -EINVAL;
1108        if (strlen(name) > 255)
1109                return -ERANGE;
1110        down_write(&EXT4_I(inode)->xattr_sem);
1111        no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
1112        ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
1113
1114        error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1115        if (error)
1116                goto cleanup;
1117
1118        if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1119                struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1120                memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1121                ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1122        }
1123
1124        error = ext4_xattr_ibody_find(inode, &i, &is);
1125        if (error)
1126                goto cleanup;
1127        if (is.s.not_found)
1128                error = ext4_xattr_block_find(inode, &i, &bs);
1129        if (error)
1130                goto cleanup;
1131        if (is.s.not_found && bs.s.not_found) {
1132                error = -ENODATA;
1133                if (flags & XATTR_REPLACE)
1134                        goto cleanup;
1135                error = 0;
1136                if (!value)
1137                        goto cleanup;
1138        } else {
1139                error = -EEXIST;
1140                if (flags & XATTR_CREATE)
1141                        goto cleanup;
1142        }
1143        if (!value) {
1144                if (!is.s.not_found)
1145                        error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1146                else if (!bs.s.not_found)
1147                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
1148        } else {
1149                error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1150                if (!error && !bs.s.not_found) {
1151                        i.value = NULL;
1152                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
1153                } else if (error == -ENOSPC) {
1154                        if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
1155                                error = ext4_xattr_block_find(inode, &i, &bs);
1156                                if (error)
1157                                        goto cleanup;
1158                        }
1159                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
1160                        if (error)
1161                                goto cleanup;
1162                        if (!is.s.not_found) {
1163                                i.value = NULL;
1164                                error = ext4_xattr_ibody_set(handle, inode, &i,
1165                                                             &is);
1166                        }
1167                }
1168        }
1169        if (!error) {
1170                ext4_xattr_update_super_block(handle, inode->i_sb);
1171                inode->i_ctime = ext4_current_time(inode);
1172                if (!value)
1173                        ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1174                error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1175                /*
1176                 * The bh is consumed by ext4_mark_iloc_dirty, even with
1177                 * error != 0.
1178                 */
1179                is.iloc.bh = NULL;
1180                if (IS_SYNC(inode))
1181                        ext4_handle_sync(handle);
1182        }
1183
1184cleanup:
1185        brelse(is.iloc.bh);
1186        brelse(bs.bh);
1187        if (no_expand == 0)
1188                ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1189        up_write(&EXT4_I(inode)->xattr_sem);
1190        return error;
1191}
1192
1193/*
1194 * ext4_xattr_set()
1195 *
1196 * Like ext4_xattr_set_handle, but start from an inode. This extended
1197 * attribute modification is a filesystem transaction by itself.
1198 *
1199 * Returns 0, or a negative error number on failure.
1200 */
1201int
1202ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1203               const void *value, size_t value_len, int flags)
1204{
1205        handle_t *handle;
1206        int error, retries = 0;
1207        int credits = ext4_jbd2_credits_xattr(inode);
1208
1209retry:
1210        handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1211        if (IS_ERR(handle)) {
1212                error = PTR_ERR(handle);
1213        } else {
1214                int error2;
1215
1216                error = ext4_xattr_set_handle(handle, inode, name_index, name,
1217                                              value, value_len, flags);
1218                error2 = ext4_journal_stop(handle);
1219                if (error == -ENOSPC &&
1220                    ext4_should_retry_alloc(inode->i_sb, &retries))
1221                        goto retry;
1222                if (error == 0)
1223                        error = error2;
1224        }
1225
1226        return error;
1227}
1228
1229/*
1230 * Shift the EA entries in the inode to create space for the increased
1231 * i_extra_isize.
1232 */
1233static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
1234                                     int value_offs_shift, void *to,
1235                                     void *from, size_t n, int blocksize)
1236{
1237        struct ext4_xattr_entry *last = entry;
1238        int new_offs;
1239
1240        /* Adjust the value offsets of the entries */
1241        for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1242                if (!last->e_value_block && last->e_value_size) {
1243                        new_offs = le16_to_cpu(last->e_value_offs) +
1244                                                        value_offs_shift;
1245                        BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
1246                                 > blocksize);
1247                        last->e_value_offs = cpu_to_le16(new_offs);
1248                }
1249        }
1250        /* Shift the entries by n bytes */
1251        memmove(to, from, n);
1252}
1253
1254/*
1255 * Expand an inode by new_extra_isize bytes when EAs are present.
1256 * Returns 0 on success or negative error number on failure.
1257 */
1258int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1259                               struct ext4_inode *raw_inode, handle_t *handle)
1260{
1261        struct ext4_xattr_ibody_header *header;
1262        struct ext4_xattr_entry *entry, *last, *first;
1263        struct buffer_head *bh = NULL;
1264        struct ext4_xattr_ibody_find *is = NULL;
1265        struct ext4_xattr_block_find *bs = NULL;
1266        char *buffer = NULL, *b_entry_name = NULL;
1267        size_t min_offs, free;
1268        int total_ino;
1269        void *base, *start, *end;
1270        int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1271        int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1272
1273        down_write(&EXT4_I(inode)->xattr_sem);
1274retry:
1275        if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
1276                up_write(&EXT4_I(inode)->xattr_sem);
1277                return 0;
1278        }
1279
1280        header = IHDR(inode, raw_inode);
1281        entry = IFIRST(header);
1282
1283        /*
1284         * Check if enough free space is available in the inode to shift the
1285         * entries ahead by new_extra_isize.
1286         */
1287
1288        base = start = entry;
1289        end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1290        min_offs = end - base;
1291        last = entry;
1292        total_ino = sizeof(struct ext4_xattr_ibody_header);
1293
1294        free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
1295        if (free >= new_extra_isize) {
1296                entry = IFIRST(header);
1297                ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
1298                                - new_extra_isize, (void *)raw_inode +
1299                                EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
1300                                (void *)header, total_ino,
1301                                inode->i_sb->s_blocksize);
1302                EXT4_I(inode)->i_extra_isize = new_extra_isize;
1303                error = 0;
1304                goto cleanup;
1305        }
1306
1307        /*
1308         * Enough free space isn't available in the inode, check if
1309         * EA block can hold new_extra_isize bytes.
1310         */
1311        if (EXT4_I(inode)->i_file_acl) {
1312                bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1313                error = -EIO;
1314                if (!bh)
1315                        goto cleanup;
1316                if (ext4_xattr_check_block(inode, bh)) {
1317                        EXT4_ERROR_INODE(inode, "bad block %llu",
1318                                         EXT4_I(inode)->i_file_acl);
1319                        error = -EIO;
1320                        goto cleanup;
1321                }
1322                base = BHDR(bh);
1323                first = BFIRST(bh);
1324                end = bh->b_data + bh->b_size;
1325                min_offs = end - base;
1326                free = ext4_xattr_free_space(first, &min_offs, base, NULL);
1327                if (free < new_extra_isize) {
1328                        if (!tried_min_extra_isize && s_min_extra_isize) {
1329                                tried_min_extra_isize++;
1330                                new_extra_isize = s_min_extra_isize;
1331                                brelse(bh);
1332                                goto retry;
1333                        }
1334                        error = -1;
1335                        goto cleanup;
1336                }
1337        } else {
1338                free = inode->i_sb->s_blocksize;
1339        }
1340
1341        while (new_extra_isize > 0) {
1342                size_t offs, size, entry_size;
1343                struct ext4_xattr_entry *small_entry = NULL;
1344                struct ext4_xattr_info i = {
1345                        .value = NULL,
1346                        .value_len = 0,
1347                };
1348                unsigned int total_size;  /* EA entry size + value size */
1349                unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
1350                unsigned int min_total_size = ~0U;
1351
1352                is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
1353                bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
1354                if (!is || !bs) {
1355                        error = -ENOMEM;
1356                        goto cleanup;
1357                }
1358
1359                is->s.not_found = -ENODATA;
1360                bs->s.not_found = -ENODATA;
1361                is->iloc.bh = NULL;
1362                bs->bh = NULL;
1363
1364                last = IFIRST(header);
1365                /* Find the entry best suited to be pushed into EA block */
1366                entry = NULL;
1367                for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1368                        total_size =
1369                        EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
1370                                        EXT4_XATTR_LEN(last->e_name_len);
1371                        if (total_size <= free && total_size < min_total_size) {
1372                                if (total_size < new_extra_isize) {
1373                                        small_entry = last;
1374                                } else {
1375                                        entry = last;
1376                                        min_total_size = total_size;
1377                                }
1378                        }
1379                }
1380
1381                if (entry == NULL) {
1382                        if (small_entry) {
1383                                entry = small_entry;
1384                        } else {
1385                                if (!tried_min_extra_isize &&
1386                                    s_min_extra_isize) {
1387                                        tried_min_extra_isize++;
1388                                        new_extra_isize = s_min_extra_isize;
1389                                        kfree(is); is = NULL;
1390                                        kfree(bs); bs = NULL;
1391                                        brelse(bh);
1392                                        goto retry;
1393                                }
1394                                error = -1;
1395                                goto cleanup;
1396                        }
1397                }
1398                offs = le16_to_cpu(entry->e_value_offs);
1399                size = le32_to_cpu(entry->e_value_size);
1400                entry_size = EXT4_XATTR_LEN(entry->e_name_len);
1401                i.name_index = entry->e_name_index,
1402                buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
1403                b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
1404                if (!buffer || !b_entry_name) {
1405                        error = -ENOMEM;
1406                        goto cleanup;
1407                }
1408                /* Save the entry name and the entry value */
1409                memcpy(buffer, (void *)IFIRST(header) + offs,
1410                       EXT4_XATTR_SIZE(size));
1411                memcpy(b_entry_name, entry->e_name, entry->e_name_len);
1412                b_entry_name[entry->e_name_len] = '\0';
1413                i.name = b_entry_name;
1414
1415                error = ext4_get_inode_loc(inode, &is->iloc);
1416                if (error)
1417                        goto cleanup;
1418
1419                error = ext4_xattr_ibody_find(inode, &i, is);
1420                if (error)
1421                        goto cleanup;
1422
1423                /* Remove the chosen entry from the inode */
1424                error = ext4_xattr_ibody_set(handle, inode, &i, is);
1425                if (error)
1426                        goto cleanup;
1427
1428                entry = IFIRST(header);
1429                if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
1430                        shift_bytes = new_extra_isize;
1431                else
1432                        shift_bytes = entry_size + size;
1433                /* Adjust the offsets and shift the remaining entries ahead */
1434                ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
1435                        shift_bytes, (void *)raw_inode +
1436                        EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
1437                        (void *)header, total_ino - entry_size,
1438                        inode->i_sb->s_blocksize);
1439
1440                extra_isize += shift_bytes;
1441                new_extra_isize -= shift_bytes;
1442                EXT4_I(inode)->i_extra_isize = extra_isize;
1443
1444                i.name = b_entry_name;
1445                i.value = buffer;
1446                i.value_len = size;
1447                error = ext4_xattr_block_find(inode, &i, bs);
1448                if (error)
1449                        goto cleanup;
1450
1451                /* Add entry which was removed from the inode into the block */
1452                error = ext4_xattr_block_set(handle, inode, &i, bs);
1453                if (error)
1454                        goto cleanup;
1455                kfree(b_entry_name);
1456                kfree(buffer);
1457                b_entry_name = NULL;
1458                buffer = NULL;
1459                brelse(is->iloc.bh);
1460                kfree(is);
1461                kfree(bs);
1462        }
1463        brelse(bh);
1464        up_write(&EXT4_I(inode)->xattr_sem);
1465        return 0;
1466
1467cleanup:
1468        kfree(b_entry_name);
1469        kfree(buffer);
1470        if (is)
1471                brelse(is->iloc.bh);
1472        kfree(is);
1473        kfree(bs);
1474        brelse(bh);
1475        up_write(&EXT4_I(inode)->xattr_sem);
1476        return error;
1477}
1478
1479
1480
1481/*
1482 * ext4_xattr_delete_inode()
1483 *
1484 * Free extended attribute resources associated with this inode. This
1485 * is called immediately before an inode is freed. We have exclusive
1486 * access to the inode.
1487 */
1488void
1489ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1490{
1491        struct buffer_head *bh = NULL;
1492
1493        if (!EXT4_I(inode)->i_file_acl)
1494                goto cleanup;
1495        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1496        if (!bh) {
1497                EXT4_ERROR_INODE(inode, "block %llu read error",
1498                                 EXT4_I(inode)->i_file_acl);
1499                goto cleanup;
1500        }
1501        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1502            BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1503                EXT4_ERROR_INODE(inode, "bad block %llu",
1504                                 EXT4_I(inode)->i_file_acl);
1505                goto cleanup;
1506        }
1507        ext4_xattr_release_block(handle, inode, bh);
1508        EXT4_I(inode)->i_file_acl = 0;
1509
1510cleanup:
1511        brelse(bh);
1512}
1513
1514/*
1515 * ext4_xattr_put_super()
1516 *
1517 * This is called when a file system is unmounted.
1518 */
1519void
1520ext4_xattr_put_super(struct super_block *sb)
1521{
1522        mb_cache_shrink(sb->s_bdev);
1523}
1524
1525/*
1526 * ext4_xattr_cache_insert()
1527 *
1528 * Create a new entry in the extended attribute cache, and insert
1529 * it unless such an entry is already in the cache.
1530 *
1531 * Returns 0, or a negative error number on failure.
1532 */
1533static void
1534ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1535{
1536        __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1537        struct mb_cache_entry *ce;
1538        int error;
1539
1540        ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
1541        if (!ce) {
1542                ea_bdebug(bh, "out of memory");
1543                return;
1544        }
1545        error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1546        if (error) {
1547                mb_cache_entry_free(ce);
1548                if (error == -EBUSY) {
1549                        ea_bdebug(bh, "already in cache");
1550                        error = 0;
1551                }
1552        } else {
1553                ea_bdebug(bh, "inserting [%x]", (int)hash);
1554                mb_cache_entry_release(ce);
1555        }
1556}
1557
1558/*
1559 * ext4_xattr_cmp()
1560 *
1561 * Compare two extended attribute blocks for equality.
1562 *
1563 * Returns 0 if the blocks are equal, 1 if they differ, and
1564 * a negative error number on errors.
1565 */
1566static int
1567ext4_xattr_cmp(struct ext4_xattr_header *header1,
1568               struct ext4_xattr_header *header2)
1569{
1570        struct ext4_xattr_entry *entry1, *entry2;
1571
1572        entry1 = ENTRY(header1+1);
1573        entry2 = ENTRY(header2+1);
1574        while (!IS_LAST_ENTRY(entry1)) {
1575                if (IS_LAST_ENTRY(entry2))
1576                        return 1;
1577                if (entry1->e_hash != entry2->e_hash ||
1578                    entry1->e_name_index != entry2->e_name_index ||
1579                    entry1->e_name_len != entry2->e_name_len ||
1580                    entry1->e_value_size != entry2->e_value_size ||
1581                    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1582                        return 1;
1583                if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1584                        return -EIO;
1585                if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1586                           (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1587                           le32_to_cpu(entry1->e_value_size)))
1588                        return 1;
1589
1590                entry1 = EXT4_XATTR_NEXT(entry1);
1591                entry2 = EXT4_XATTR_NEXT(entry2);
1592        }
1593        if (!IS_LAST_ENTRY(entry2))
1594                return 1;
1595        return 0;
1596}
1597
1598/*
1599 * ext4_xattr_cache_find()
1600 *
1601 * Find an identical extended attribute block.
1602 *
1603 * Returns a pointer to the block found, or NULL if such a block was
1604 * not found or an error occurred.
1605 */
1606static struct buffer_head *
1607ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1608                      struct mb_cache_entry **pce)
1609{
1610        __u32 hash = le32_to_cpu(header->h_hash);
1611        struct mb_cache_entry *ce;
1612        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1613
1614        if (!header->h_hash)
1615                return NULL;  /* never share */
1616        ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1617again:
1618        ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
1619                                       hash);
1620        while (ce) {
1621                struct buffer_head *bh;
1622
1623                if (IS_ERR(ce)) {
1624                        if (PTR_ERR(ce) == -EAGAIN)
1625                                goto again;
1626                        break;
1627                }
1628                bh = sb_bread(inode->i_sb, ce->e_block);
1629                if (!bh) {
1630                        EXT4_ERROR_INODE(inode, "block %lu read error",
1631                                         (unsigned long) ce->e_block);
1632                } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1633                                EXT4_XATTR_REFCOUNT_MAX) {
1634                        ea_idebug(inode, "block %lu refcount %d>=%d",
1635                                  (unsigned long) ce->e_block,
1636                                  le32_to_cpu(BHDR(bh)->h_refcount),
1637                                          EXT4_XATTR_REFCOUNT_MAX);
1638                } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1639                        *pce = ce;
1640                        return bh;
1641                }
1642                brelse(bh);
1643                ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1644        }
1645        return NULL;
1646}
1647
1648#define NAME_HASH_SHIFT 5
1649#define VALUE_HASH_SHIFT 16
1650
1651/*
1652 * ext4_xattr_hash_entry()
1653 *
1654 * Compute the hash of an extended attribute.
1655 */
1656static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1657                                         struct ext4_xattr_entry *entry)
1658{
1659        __u32 hash = 0;
1660        char *name = entry->e_name;
1661        int n;
1662
1663        for (n = 0; n < entry->e_name_len; n++) {
1664                hash = (hash << NAME_HASH_SHIFT) ^
1665                       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1666                       *name++;
1667        }
1668
1669        if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1670                __le32 *value = (__le32 *)((char *)header +
1671                        le16_to_cpu(entry->e_value_offs));
1672                for (n = (le32_to_cpu(entry->e_value_size) +
1673                     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1674                        hash = (hash << VALUE_HASH_SHIFT) ^
1675                               (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1676                               le32_to_cpu(*value++);
1677                }
1678        }
1679        entry->e_hash = cpu_to_le32(hash);
1680}
1681
1682#undef NAME_HASH_SHIFT
1683#undef VALUE_HASH_SHIFT
1684
1685#define BLOCK_HASH_SHIFT 16
1686
1687/*
1688 * ext4_xattr_rehash()
1689 *
1690 * Re-compute the extended attribute hash value after an entry has changed.
1691 */
1692static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1693                              struct ext4_xattr_entry *entry)
1694{
1695        struct ext4_xattr_entry *here;
1696        __u32 hash = 0;
1697
1698        ext4_xattr_hash_entry(header, entry);
1699        here = ENTRY(header+1);
1700        while (!IS_LAST_ENTRY(here)) {
1701                if (!here->e_hash) {
1702                        /* Block is not shared if an entry's hash value == 0 */
1703                        hash = 0;
1704                        break;
1705                }
1706                hash = (hash << BLOCK_HASH_SHIFT) ^
1707                       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1708                       le32_to_cpu(here->e_hash);
1709                here = EXT4_XATTR_NEXT(here);
1710        }
1711        header->h_hash = cpu_to_le32(hash);
1712}
1713
1714#undef BLOCK_HASH_SHIFT
1715
1716#define HASH_BUCKET_BITS        10
1717
1718struct mb_cache *
1719ext4_xattr_create_cache(char *name)
1720{
1721        return mb_cache_create(name, HASH_BUCKET_BITS);
1722}
1723
1724void ext4_xattr_destroy_cache(struct mb_cache *cache)
1725{
1726        if (cache)
1727                mb_cache_destroy(cache);
1728}
1729
1730