linux/fs/ext2/xattr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * linux/fs/ext2/xattr.c
   4 *
   5 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
   6 *
   7 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
   8 * Extended attributes for symlinks and special files added per
   9 *  suggestion of Luka Renko <luka.renko@hermes.si>.
  10 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  11 *  Red Hat Inc.
  12 *
  13 */
  14
  15/*
  16 * Extended attributes are stored on disk blocks allocated outside of
  17 * any inode. The i_file_acl field is then made to point to this allocated
  18 * block. If all extended attributes of an inode are identical, these
  19 * inodes may share the same extended attribute block. Such situations
  20 * are automatically detected by keeping a cache of recent attribute block
  21 * numbers and hashes over the block's contents in memory.
  22 *
  23 *
  24 * Extended attribute block layout:
  25 *
  26 *   +------------------+
  27 *   | header           |
  28 *   | entry 1          | |
  29 *   | entry 2          | | growing downwards
  30 *   | entry 3          | v
  31 *   | four null bytes  |
  32 *   | . . .            |
  33 *   | value 1          | ^
  34 *   | value 3          | | growing upwards
  35 *   | value 2          | |
  36 *   +------------------+
  37 *
  38 * The block header is followed by multiple entry descriptors. These entry
  39 * descriptors are variable in size, and aligned to EXT2_XATTR_PAD
  40 * byte boundaries. The entry descriptors are sorted by attribute name,
  41 * so that two extended attribute blocks can be compared efficiently.
  42 *
  43 * Attribute values are aligned to the end of the block, stored in
  44 * no specific order. They are also padded to EXT2_XATTR_PAD byte
  45 * boundaries. No additional gaps are left between them.
  46 *
  47 * Locking strategy
  48 * ----------------
  49 * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
  50 * EA blocks are only changed if they are exclusive to an inode, so
  51 * holding xattr_sem also means that nothing but the EA block's reference
  52 * count will change. Multiple writers to an EA block are synchronized
  53 * by the bh lock. No more than a single bh lock is held at any time
  54 * to avoid deadlocks.
  55 */
  56
  57#include <linux/buffer_head.h>
  58#include <linux/init.h>
  59#include <linux/slab.h>
  60#include <linux/mbcache.h>
  61#include <linux/quotaops.h>
  62#include <linux/rwsem.h>
  63#include <linux/security.h>
  64#include "ext2.h"
  65#include "xattr.h"
  66#include "acl.h"
  67
  68#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
  69#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
  70#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
  71#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
  72
  73#ifdef EXT2_XATTR_DEBUG
  74# define ea_idebug(inode, f...) do { \
  75                printk(KERN_DEBUG "inode %s:%ld: ", \
  76                        inode->i_sb->s_id, inode->i_ino); \
  77                printk(f); \
  78                printk("\n"); \
  79        } while (0)
  80# define ea_bdebug(bh, f...) do { \
  81                printk(KERN_DEBUG "block %pg:%lu: ", \
  82                        bh->b_bdev, (unsigned long) bh->b_blocknr); \
  83                printk(f); \
  84                printk("\n"); \
  85        } while (0)
  86#else
  87# define ea_idebug(f...)
  88# define ea_bdebug(f...)
  89#endif
  90
  91static int ext2_xattr_set2(struct inode *, struct buffer_head *,
  92                           struct ext2_xattr_header *);
  93
  94static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
  95static struct buffer_head *ext2_xattr_cache_find(struct inode *,
  96                                                 struct ext2_xattr_header *);
  97static void ext2_xattr_rehash(struct ext2_xattr_header *,
  98                              struct ext2_xattr_entry *);
  99
 100static const struct xattr_handler *ext2_xattr_handler_map[] = {
 101        [EXT2_XATTR_INDEX_USER]              = &ext2_xattr_user_handler,
 102#ifdef CONFIG_EXT2_FS_POSIX_ACL
 103        [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
 104        [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
 105#endif
 106        [EXT2_XATTR_INDEX_TRUSTED]           = &ext2_xattr_trusted_handler,
 107#ifdef CONFIG_EXT2_FS_SECURITY
 108        [EXT2_XATTR_INDEX_SECURITY]          = &ext2_xattr_security_handler,
 109#endif
 110};
 111
 112const struct xattr_handler *ext2_xattr_handlers[] = {
 113        &ext2_xattr_user_handler,
 114        &ext2_xattr_trusted_handler,
 115#ifdef CONFIG_EXT2_FS_POSIX_ACL
 116        &posix_acl_access_xattr_handler,
 117        &posix_acl_default_xattr_handler,
 118#endif
 119#ifdef CONFIG_EXT2_FS_SECURITY
 120        &ext2_xattr_security_handler,
 121#endif
 122        NULL
 123};
 124
 125#define EA_BLOCK_CACHE(inode)   (EXT2_SB(inode->i_sb)->s_ea_block_cache)
 126
 127static inline const struct xattr_handler *
 128ext2_xattr_handler(int name_index)
 129{
 130        const struct xattr_handler *handler = NULL;
 131
 132        if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
 133                handler = ext2_xattr_handler_map[name_index];
 134        return handler;
 135}
 136
 137static bool
 138ext2_xattr_header_valid(struct ext2_xattr_header *header)
 139{
 140        if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
 141            header->h_blocks != cpu_to_le32(1))
 142                return false;
 143
 144        return true;
 145}
 146
 147static bool
 148ext2_xattr_entry_valid(struct ext2_xattr_entry *entry,
 149                       char *end, size_t end_offs)
 150{
 151        struct ext2_xattr_entry *next;
 152        size_t size;
 153
 154        next = EXT2_XATTR_NEXT(entry);
 155        if ((char *)next >= end)
 156                return false;
 157
 158        if (entry->e_value_block != 0)
 159                return false;
 160
 161        size = le32_to_cpu(entry->e_value_size);
 162        if (size > end_offs ||
 163            le16_to_cpu(entry->e_value_offs) + size > end_offs)
 164                return false;
 165
 166        return true;
 167}
 168
 169static int
 170ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name,
 171                     struct ext2_xattr_entry *entry)
 172{
 173        int cmp;
 174
 175        cmp = name_index - entry->e_name_index;
 176        if (!cmp)
 177                cmp = name_len - entry->e_name_len;
 178        if (!cmp)
 179                cmp = memcmp(name, entry->e_name, name_len);
 180
 181        return cmp;
 182}
 183
 184/*
 185 * ext2_xattr_get()
 186 *
 187 * Copy an extended attribute into the buffer
 188 * provided, or compute the buffer size required.
 189 * Buffer is NULL to compute the size of the buffer required.
 190 *
 191 * Returns a negative error number on failure, or the number of bytes
 192 * used / required on success.
 193 */
 194int
 195ext2_xattr_get(struct inode *inode, int name_index, const char *name,
 196               void *buffer, size_t buffer_size)
 197{
 198        struct buffer_head *bh = NULL;
 199        struct ext2_xattr_entry *entry;
 200        size_t name_len, size;
 201        char *end;
 202        int error, not_found;
 203        struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 204
 205        ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
 206                  name_index, name, buffer, (long)buffer_size);
 207
 208        if (name == NULL)
 209                return -EINVAL;
 210        name_len = strlen(name);
 211        if (name_len > 255)
 212                return -ERANGE;
 213
 214        down_read(&EXT2_I(inode)->xattr_sem);
 215        error = -ENODATA;
 216        if (!EXT2_I(inode)->i_file_acl)
 217                goto cleanup;
 218        ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
 219        bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
 220        error = -EIO;
 221        if (!bh)
 222                goto cleanup;
 223        ea_bdebug(bh, "b_count=%d, refcount=%d",
 224                atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
 225        end = bh->b_data + bh->b_size;
 226        if (!ext2_xattr_header_valid(HDR(bh))) {
 227bad_block:
 228                ext2_error(inode->i_sb, "ext2_xattr_get",
 229                        "inode %ld: bad block %d", inode->i_ino,
 230                        EXT2_I(inode)->i_file_acl);
 231                error = -EIO;
 232                goto cleanup;
 233        }
 234
 235        /* find named attribute */
 236        entry = FIRST_ENTRY(bh);
 237        while (!IS_LAST_ENTRY(entry)) {
 238                if (!ext2_xattr_entry_valid(entry, end,
 239                    inode->i_sb->s_blocksize))
 240                        goto bad_block;
 241
 242                not_found = ext2_xattr_cmp_entry(name_index, name_len, name,
 243                                                 entry);
 244                if (!not_found)
 245                        goto found;
 246                if (not_found < 0)
 247                        break;
 248
 249                entry = EXT2_XATTR_NEXT(entry);
 250        }
 251        if (ext2_xattr_cache_insert(ea_block_cache, bh))
 252                ea_idebug(inode, "cache insert failed");
 253        error = -ENODATA;
 254        goto cleanup;
 255found:
 256        size = le32_to_cpu(entry->e_value_size);
 257        if (ext2_xattr_cache_insert(ea_block_cache, bh))
 258                ea_idebug(inode, "cache insert failed");
 259        if (buffer) {
 260                error = -ERANGE;
 261                if (size > buffer_size)
 262                        goto cleanup;
 263                /* return value of attribute */
 264                memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
 265                        size);
 266        }
 267        error = size;
 268
 269cleanup:
 270        brelse(bh);
 271        up_read(&EXT2_I(inode)->xattr_sem);
 272
 273        return error;
 274}
 275
 276/*
 277 * ext2_xattr_list()
 278 *
 279 * Copy a list of attribute names into the buffer
 280 * provided, or compute the buffer size required.
 281 * Buffer is NULL to compute the size of the buffer required.
 282 *
 283 * Returns a negative error number on failure, or the number of bytes
 284 * used / required on success.
 285 */
 286static int
 287ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 288{
 289        struct inode *inode = d_inode(dentry);
 290        struct buffer_head *bh = NULL;
 291        struct ext2_xattr_entry *entry;
 292        char *end;
 293        size_t rest = buffer_size;
 294        int error;
 295        struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 296
 297        ea_idebug(inode, "buffer=%p, buffer_size=%ld",
 298                  buffer, (long)buffer_size);
 299
 300        down_read(&EXT2_I(inode)->xattr_sem);
 301        error = 0;
 302        if (!EXT2_I(inode)->i_file_acl)
 303                goto cleanup;
 304        ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
 305        bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
 306        error = -EIO;
 307        if (!bh)
 308                goto cleanup;
 309        ea_bdebug(bh, "b_count=%d, refcount=%d",
 310                atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
 311        end = bh->b_data + bh->b_size;
 312        if (!ext2_xattr_header_valid(HDR(bh))) {
 313bad_block:
 314                ext2_error(inode->i_sb, "ext2_xattr_list",
 315                        "inode %ld: bad block %d", inode->i_ino,
 316                        EXT2_I(inode)->i_file_acl);
 317                error = -EIO;
 318                goto cleanup;
 319        }
 320
 321        /* check the on-disk data structure */
 322        entry = FIRST_ENTRY(bh);
 323        while (!IS_LAST_ENTRY(entry)) {
 324                if (!ext2_xattr_entry_valid(entry, end,
 325                    inode->i_sb->s_blocksize))
 326                        goto bad_block;
 327                entry = EXT2_XATTR_NEXT(entry);
 328        }
 329        if (ext2_xattr_cache_insert(ea_block_cache, bh))
 330                ea_idebug(inode, "cache insert failed");
 331
 332        /* list the attribute names */
 333        for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
 334             entry = EXT2_XATTR_NEXT(entry)) {
 335                const struct xattr_handler *handler =
 336                        ext2_xattr_handler(entry->e_name_index);
 337
 338                if (handler && (!handler->list || handler->list(dentry))) {
 339                        const char *prefix = handler->prefix ?: handler->name;
 340                        size_t prefix_len = strlen(prefix);
 341                        size_t size = prefix_len + entry->e_name_len + 1;
 342
 343                        if (buffer) {
 344                                if (size > rest) {
 345                                        error = -ERANGE;
 346                                        goto cleanup;
 347                                }
 348                                memcpy(buffer, prefix, prefix_len);
 349                                buffer += prefix_len;
 350                                memcpy(buffer, entry->e_name, entry->e_name_len);
 351                                buffer += entry->e_name_len;
 352                                *buffer++ = 0;
 353                        }
 354                        rest -= size;
 355                }
 356        }
 357        error = buffer_size - rest;  /* total size */
 358
 359cleanup:
 360        brelse(bh);
 361        up_read(&EXT2_I(inode)->xattr_sem);
 362
 363        return error;
 364}
 365
 366/*
 367 * Inode operation listxattr()
 368 *
 369 * d_inode(dentry)->i_mutex: don't care
 370 */
 371ssize_t
 372ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 373{
 374        return ext2_xattr_list(dentry, buffer, size);
 375}
 376
 377/*
 378 * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
 379 * not set, set it.
 380 */
 381static void ext2_xattr_update_super_block(struct super_block *sb)
 382{
 383        if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
 384                return;
 385
 386        spin_lock(&EXT2_SB(sb)->s_lock);
 387        ext2_update_dynamic_rev(sb);
 388        EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
 389        spin_unlock(&EXT2_SB(sb)->s_lock);
 390        mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
 391}
 392
 393/*
 394 * ext2_xattr_set()
 395 *
 396 * Create, replace or remove an extended attribute for this inode.  Value
 397 * is NULL to remove an existing extended attribute, and non-NULL to
 398 * either replace an existing extended attribute, or create a new extended
 399 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
 400 * specify that an extended attribute must exist and must not exist
 401 * previous to the call, respectively.
 402 *
 403 * Returns 0, or a negative error number on failure.
 404 */
 405int
 406ext2_xattr_set(struct inode *inode, int name_index, const char *name,
 407               const void *value, size_t value_len, int flags)
 408{
 409        struct super_block *sb = inode->i_sb;
 410        struct buffer_head *bh = NULL;
 411        struct ext2_xattr_header *header = NULL;
 412        struct ext2_xattr_entry *here = NULL, *last = NULL;
 413        size_t name_len, free, min_offs = sb->s_blocksize;
 414        int not_found = 1, error;
 415        char *end;
 416        
 417        /*
 418         * header -- Points either into bh, or to a temporarily
 419         *           allocated buffer.
 420         * here -- The named entry found, or the place for inserting, within
 421         *         the block pointed to by header.
 422         * last -- Points right after the last named entry within the block
 423         *         pointed to by header.
 424         * min_offs -- The offset of the first value (values are aligned
 425         *             towards the end of the block).
 426         * end -- Points right after the block pointed to by header.
 427         */
 428        
 429        ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
 430                  name_index, name, value, (long)value_len);
 431
 432        if (value == NULL)
 433                value_len = 0;
 434        if (name == NULL)
 435                return -EINVAL;
 436        name_len = strlen(name);
 437        if (name_len > 255 || value_len > sb->s_blocksize)
 438                return -ERANGE;
 439        down_write(&EXT2_I(inode)->xattr_sem);
 440        if (EXT2_I(inode)->i_file_acl) {
 441                /* The inode already has an extended attribute block. */
 442                bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
 443                error = -EIO;
 444                if (!bh)
 445                        goto cleanup;
 446                ea_bdebug(bh, "b_count=%d, refcount=%d",
 447                        atomic_read(&(bh->b_count)),
 448                        le32_to_cpu(HDR(bh)->h_refcount));
 449                header = HDR(bh);
 450                end = bh->b_data + bh->b_size;
 451                if (!ext2_xattr_header_valid(header)) {
 452bad_block:
 453                        ext2_error(sb, "ext2_xattr_set",
 454                                "inode %ld: bad block %d", inode->i_ino, 
 455                                   EXT2_I(inode)->i_file_acl);
 456                        error = -EIO;
 457                        goto cleanup;
 458                }
 459                /*
 460                 * Find the named attribute. If not found, 'here' will point
 461                 * to entry where the new attribute should be inserted to
 462                 * maintain sorting.
 463                 */
 464                last = FIRST_ENTRY(bh);
 465                while (!IS_LAST_ENTRY(last)) {
 466                        if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize))
 467                                goto bad_block;
 468                        if (last->e_value_size) {
 469                                size_t offs = le16_to_cpu(last->e_value_offs);
 470                                if (offs < min_offs)
 471                                        min_offs = offs;
 472                        }
 473                        if (not_found > 0) {
 474                                not_found = ext2_xattr_cmp_entry(name_index,
 475                                                                 name_len,
 476                                                                 name, last);
 477                                if (not_found <= 0)
 478                                        here = last;
 479                        }
 480                        last = EXT2_XATTR_NEXT(last);
 481                }
 482                if (not_found > 0)
 483                        here = last;
 484
 485                /* Check whether we have enough space left. */
 486                free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
 487        } else {
 488                /* We will use a new extended attribute block. */
 489                free = sb->s_blocksize -
 490                        sizeof(struct ext2_xattr_header) - sizeof(__u32);
 491        }
 492
 493        if (not_found) {
 494                /* Request to remove a nonexistent attribute? */
 495                error = -ENODATA;
 496                if (flags & XATTR_REPLACE)
 497                        goto cleanup;
 498                error = 0;
 499                if (value == NULL)
 500                        goto cleanup;
 501        } else {
 502                /* Request to create an existing attribute? */
 503                error = -EEXIST;
 504                if (flags & XATTR_CREATE)
 505                        goto cleanup;
 506                free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size));
 507                free += EXT2_XATTR_LEN(name_len);
 508        }
 509        error = -ENOSPC;
 510        if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
 511                goto cleanup;
 512
 513        /* Here we know that we can set the new attribute. */
 514
 515        if (header) {
 516                /* assert(header == HDR(bh)); */
 517                lock_buffer(bh);
 518                if (header->h_refcount == cpu_to_le32(1)) {
 519                        __u32 hash = le32_to_cpu(header->h_hash);
 520
 521                        ea_bdebug(bh, "modifying in-place");
 522                        /*
 523                         * This must happen under buffer lock for
 524                         * ext2_xattr_set2() to reliably detect modified block
 525                         */
 526                        mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
 527                                              bh->b_blocknr);
 528
 529                        /* keep the buffer locked while modifying it. */
 530                } else {
 531                        int offset;
 532
 533                        unlock_buffer(bh);
 534                        ea_bdebug(bh, "cloning");
 535                        header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
 536                        error = -ENOMEM;
 537                        if (header == NULL)
 538                                goto cleanup;
 539                        header->h_refcount = cpu_to_le32(1);
 540
 541                        offset = (char *)here - bh->b_data;
 542                        here = ENTRY((char *)header + offset);
 543                        offset = (char *)last - bh->b_data;
 544                        last = ENTRY((char *)header + offset);
 545                }
 546        } else {
 547                /* Allocate a buffer where we construct the new block. */
 548                header = kzalloc(sb->s_blocksize, GFP_KERNEL);
 549                error = -ENOMEM;
 550                if (header == NULL)
 551                        goto cleanup;
 552                end = (char *)header + sb->s_blocksize;
 553                header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
 554                header->h_blocks = header->h_refcount = cpu_to_le32(1);
 555                last = here = ENTRY(header+1);
 556        }
 557
 558        /* Iff we are modifying the block in-place, bh is locked here. */
 559
 560        if (not_found) {
 561                /* Insert the new name. */
 562                size_t size = EXT2_XATTR_LEN(name_len);
 563                size_t rest = (char *)last - (char *)here;
 564                memmove((char *)here + size, here, rest);
 565                memset(here, 0, size);
 566                here->e_name_index = name_index;
 567                here->e_name_len = name_len;
 568                memcpy(here->e_name, name, name_len);
 569        } else {
 570                if (here->e_value_size) {
 571                        char *first_val = (char *)header + min_offs;
 572                        size_t offs = le16_to_cpu(here->e_value_offs);
 573                        char *val = (char *)header + offs;
 574                        size_t size = EXT2_XATTR_SIZE(
 575                                le32_to_cpu(here->e_value_size));
 576
 577                        if (size == EXT2_XATTR_SIZE(value_len)) {
 578                                /* The old and the new value have the same
 579                                   size. Just replace. */
 580                                here->e_value_size = cpu_to_le32(value_len);
 581                                memset(val + size - EXT2_XATTR_PAD, 0,
 582                                       EXT2_XATTR_PAD); /* Clear pad bytes. */
 583                                memcpy(val, value, value_len);
 584                                goto skip_replace;
 585                        }
 586
 587                        /* Remove the old value. */
 588                        memmove(first_val + size, first_val, val - first_val);
 589                        memset(first_val, 0, size);
 590                        here->e_value_offs = 0;
 591                        min_offs += size;
 592
 593                        /* Adjust all value offsets. */
 594                        last = ENTRY(header+1);
 595                        while (!IS_LAST_ENTRY(last)) {
 596                                size_t o = le16_to_cpu(last->e_value_offs);
 597                                if (o < offs)
 598                                        last->e_value_offs =
 599                                                cpu_to_le16(o + size);
 600                                last = EXT2_XATTR_NEXT(last);
 601                        }
 602                }
 603                if (value == NULL) {
 604                        /* Remove the old name. */
 605                        size_t size = EXT2_XATTR_LEN(name_len);
 606                        last = ENTRY((char *)last - size);
 607                        memmove(here, (char*)here + size,
 608                                (char*)last - (char*)here);
 609                        memset(last, 0, size);
 610                }
 611        }
 612
 613        if (value != NULL) {
 614                /* Insert the new value. */
 615                here->e_value_size = cpu_to_le32(value_len);
 616                if (value_len) {
 617                        size_t size = EXT2_XATTR_SIZE(value_len);
 618                        char *val = (char *)header + min_offs - size;
 619                        here->e_value_offs =
 620                                cpu_to_le16((char *)val - (char *)header);
 621                        memset(val + size - EXT2_XATTR_PAD, 0,
 622                               EXT2_XATTR_PAD); /* Clear the pad bytes. */
 623                        memcpy(val, value, value_len);
 624                }
 625        }
 626
 627skip_replace:
 628        if (IS_LAST_ENTRY(ENTRY(header+1))) {
 629                /* This block is now empty. */
 630                if (bh && header == HDR(bh))
 631                        unlock_buffer(bh);  /* we were modifying in-place. */
 632                error = ext2_xattr_set2(inode, bh, NULL);
 633        } else {
 634                ext2_xattr_rehash(header, here);
 635                if (bh && header == HDR(bh))
 636                        unlock_buffer(bh);  /* we were modifying in-place. */
 637                error = ext2_xattr_set2(inode, bh, header);
 638        }
 639
 640cleanup:
 641        if (!(bh && header == HDR(bh)))
 642                kfree(header);
 643        brelse(bh);
 644        up_write(&EXT2_I(inode)->xattr_sem);
 645
 646        return error;
 647}
 648
 649/*
 650 * Second half of ext2_xattr_set(): Update the file system.
 651 */
 652static int
 653ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 654                struct ext2_xattr_header *header)
 655{
 656        struct super_block *sb = inode->i_sb;
 657        struct buffer_head *new_bh = NULL;
 658        int error;
 659        struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 660
 661        if (header) {
 662                new_bh = ext2_xattr_cache_find(inode, header);
 663                if (new_bh) {
 664                        /* We found an identical block in the cache. */
 665                        if (new_bh == old_bh) {
 666                                ea_bdebug(new_bh, "keeping this block");
 667                        } else {
 668                                /* The old block is released after updating
 669                                   the inode.  */
 670                                ea_bdebug(new_bh, "reusing block");
 671
 672                                error = dquot_alloc_block(inode, 1);
 673                                if (error) {
 674                                        unlock_buffer(new_bh);
 675                                        goto cleanup;
 676                                }
 677                                le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
 678                                ea_bdebug(new_bh, "refcount now=%d",
 679                                        le32_to_cpu(HDR(new_bh)->h_refcount));
 680                        }
 681                        unlock_buffer(new_bh);
 682                } else if (old_bh && header == HDR(old_bh)) {
 683                        /* Keep this block. No need to lock the block as we
 684                           don't need to change the reference count. */
 685                        new_bh = old_bh;
 686                        get_bh(new_bh);
 687                        ext2_xattr_cache_insert(ea_block_cache, new_bh);
 688                } else {
 689                        /* We need to allocate a new block */
 690                        ext2_fsblk_t goal = ext2_group_first_block_no(sb,
 691                                                EXT2_I(inode)->i_block_group);
 692                        int block = ext2_new_block(inode, goal, &error);
 693                        if (error)
 694                                goto cleanup;
 695                        ea_idebug(inode, "creating block %d", block);
 696
 697                        new_bh = sb_getblk(sb, block);
 698                        if (unlikely(!new_bh)) {
 699                                ext2_free_blocks(inode, block, 1);
 700                                mark_inode_dirty(inode);
 701                                error = -ENOMEM;
 702                                goto cleanup;
 703                        }
 704                        lock_buffer(new_bh);
 705                        memcpy(new_bh->b_data, header, new_bh->b_size);
 706                        set_buffer_uptodate(new_bh);
 707                        unlock_buffer(new_bh);
 708                        ext2_xattr_cache_insert(ea_block_cache, new_bh);
 709                        
 710                        ext2_xattr_update_super_block(sb);
 711                }
 712                mark_buffer_dirty(new_bh);
 713                if (IS_SYNC(inode)) {
 714                        sync_dirty_buffer(new_bh);
 715                        error = -EIO;
 716                        if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
 717                                goto cleanup;
 718                }
 719        }
 720
 721        /* Update the inode. */
 722        EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 723        inode->i_ctime = current_time(inode);
 724        if (IS_SYNC(inode)) {
 725                error = sync_inode_metadata(inode, 1);
 726                /* In case sync failed due to ENOSPC the inode was actually
 727                 * written (only some dirty data were not) so we just proceed
 728                 * as if nothing happened and cleanup the unused block */
 729                if (error && error != -ENOSPC) {
 730                        if (new_bh && new_bh != old_bh) {
 731                                dquot_free_block_nodirty(inode, 1);
 732                                mark_inode_dirty(inode);
 733                        }
 734                        goto cleanup;
 735                }
 736        } else
 737                mark_inode_dirty(inode);
 738
 739        error = 0;
 740        if (old_bh && old_bh != new_bh) {
 741                /*
 742                 * If there was an old block and we are no longer using it,
 743                 * release the old block.
 744                 */
 745                lock_buffer(old_bh);
 746                if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
 747                        __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
 748
 749                        /*
 750                         * This must happen under buffer lock for
 751                         * ext2_xattr_set2() to reliably detect freed block
 752                         */
 753                        mb_cache_entry_delete(ea_block_cache, hash,
 754                                              old_bh->b_blocknr);
 755                        /* Free the old block. */
 756                        ea_bdebug(old_bh, "freeing");
 757                        ext2_free_blocks(inode, old_bh->b_blocknr, 1);
 758                        mark_inode_dirty(inode);
 759                        /* We let our caller release old_bh, so we
 760                         * need to duplicate the buffer before. */
 761                        get_bh(old_bh);
 762                        bforget(old_bh);
 763                } else {
 764                        /* Decrement the refcount only. */
 765                        le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 766                        dquot_free_block_nodirty(inode, 1);
 767                        mark_inode_dirty(inode);
 768                        mark_buffer_dirty(old_bh);
 769                        ea_bdebug(old_bh, "refcount now=%d",
 770                                le32_to_cpu(HDR(old_bh)->h_refcount));
 771                }
 772                unlock_buffer(old_bh);
 773        }
 774
 775cleanup:
 776        brelse(new_bh);
 777
 778        return error;
 779}
 780
 781/*
 782 * ext2_xattr_delete_inode()
 783 *
 784 * Free extended attribute resources associated with this inode. This
 785 * is called immediately before an inode is freed.
 786 */
 787void
 788ext2_xattr_delete_inode(struct inode *inode)
 789{
 790        struct buffer_head *bh = NULL;
 791        struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
 792
 793        down_write(&EXT2_I(inode)->xattr_sem);
 794        if (!EXT2_I(inode)->i_file_acl)
 795                goto cleanup;
 796
 797        if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 0)) {
 798                ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
 799                        "inode %ld: xattr block %d is out of data blocks range",
 800                        inode->i_ino, EXT2_I(inode)->i_file_acl);
 801                goto cleanup;
 802        }
 803
 804        bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
 805        if (!bh) {
 806                ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
 807                        "inode %ld: block %d read error", inode->i_ino,
 808                        EXT2_I(inode)->i_file_acl);
 809                goto cleanup;
 810        }
 811        ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
 812        if (!ext2_xattr_header_valid(HDR(bh))) {
 813                ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
 814                        "inode %ld: bad block %d", inode->i_ino,
 815                        EXT2_I(inode)->i_file_acl);
 816                goto cleanup;
 817        }
 818        lock_buffer(bh);
 819        if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
 820                __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
 821
 822                /*
 823                 * This must happen under buffer lock for ext2_xattr_set2() to
 824                 * reliably detect freed block
 825                 */
 826                mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
 827                                      bh->b_blocknr);
 828                ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
 829                get_bh(bh);
 830                bforget(bh);
 831                unlock_buffer(bh);
 832        } else {
 833                le32_add_cpu(&HDR(bh)->h_refcount, -1);
 834                ea_bdebug(bh, "refcount now=%d",
 835                        le32_to_cpu(HDR(bh)->h_refcount));
 836                unlock_buffer(bh);
 837                mark_buffer_dirty(bh);
 838                if (IS_SYNC(inode))
 839                        sync_dirty_buffer(bh);
 840                dquot_free_block_nodirty(inode, 1);
 841        }
 842        EXT2_I(inode)->i_file_acl = 0;
 843
 844cleanup:
 845        brelse(bh);
 846        up_write(&EXT2_I(inode)->xattr_sem);
 847}
 848
 849/*
 850 * ext2_xattr_cache_insert()
 851 *
 852 * Create a new entry in the extended attribute cache, and insert
 853 * it unless such an entry is already in the cache.
 854 *
 855 * Returns 0, or a negative error number on failure.
 856 */
 857static int
 858ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
 859{
 860        __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
 861        int error;
 862
 863        error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr,
 864                                      true);
 865        if (error) {
 866                if (error == -EBUSY) {
 867                        ea_bdebug(bh, "already in cache (%d cache entries)",
 868                                atomic_read(&ext2_xattr_cache->c_entry_count));
 869                        error = 0;
 870                }
 871        } else
 872                ea_bdebug(bh, "inserting [%x]", (int)hash);
 873        return error;
 874}
 875
 876/*
 877 * ext2_xattr_cmp()
 878 *
 879 * Compare two extended attribute blocks for equality.
 880 *
 881 * Returns 0 if the blocks are equal, 1 if they differ, and
 882 * a negative error number on errors.
 883 */
 884static int
 885ext2_xattr_cmp(struct ext2_xattr_header *header1,
 886               struct ext2_xattr_header *header2)
 887{
 888        struct ext2_xattr_entry *entry1, *entry2;
 889
 890        entry1 = ENTRY(header1+1);
 891        entry2 = ENTRY(header2+1);
 892        while (!IS_LAST_ENTRY(entry1)) {
 893                if (IS_LAST_ENTRY(entry2))
 894                        return 1;
 895                if (entry1->e_hash != entry2->e_hash ||
 896                    entry1->e_name_index != entry2->e_name_index ||
 897                    entry1->e_name_len != entry2->e_name_len ||
 898                    entry1->e_value_size != entry2->e_value_size ||
 899                    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
 900                        return 1;
 901                if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
 902                        return -EIO;
 903                if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
 904                           (char *)header2 + le16_to_cpu(entry2->e_value_offs),
 905                           le32_to_cpu(entry1->e_value_size)))
 906                        return 1;
 907
 908                entry1 = EXT2_XATTR_NEXT(entry1);
 909                entry2 = EXT2_XATTR_NEXT(entry2);
 910        }
 911        if (!IS_LAST_ENTRY(entry2))
 912                return 1;
 913        return 0;
 914}
 915
 916/*
 917 * ext2_xattr_cache_find()
 918 *
 919 * Find an identical extended attribute block.
 920 *
 921 * Returns a locked buffer head to the block found, or NULL if such
 922 * a block was not found or an error occurred.
 923 */
 924static struct buffer_head *
 925ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
 926{
 927        __u32 hash = le32_to_cpu(header->h_hash);
 928        struct mb_cache_entry *ce;
 929        struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 930
 931        if (!header->h_hash)
 932                return NULL;  /* never share */
 933        ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 934again:
 935        ce = mb_cache_entry_find_first(ea_block_cache, hash);
 936        while (ce) {
 937                struct buffer_head *bh;
 938
 939                bh = sb_bread(inode->i_sb, ce->e_value);
 940                if (!bh) {
 941                        ext2_error(inode->i_sb, "ext2_xattr_cache_find",
 942                                "inode %ld: block %ld read error",
 943                                inode->i_ino, (unsigned long) ce->e_value);
 944                } else {
 945                        lock_buffer(bh);
 946                        /*
 947                         * We have to be careful about races with freeing or
 948                         * rehashing of xattr block. Once we hold buffer lock
 949                         * xattr block's state is stable so we can check
 950                         * whether the block got freed / rehashed or not.
 951                         * Since we unhash mbcache entry under buffer lock when
 952                         * freeing / rehashing xattr block, checking whether
 953                         * entry is still hashed is reliable.
 954                         */
 955                        if (hlist_bl_unhashed(&ce->e_hash_list)) {
 956                                mb_cache_entry_put(ea_block_cache, ce);
 957                                unlock_buffer(bh);
 958                                brelse(bh);
 959                                goto again;
 960                        } else if (le32_to_cpu(HDR(bh)->h_refcount) >
 961                                   EXT2_XATTR_REFCOUNT_MAX) {
 962                                ea_idebug(inode, "block %ld refcount %d>%d",
 963                                          (unsigned long) ce->e_value,
 964                                          le32_to_cpu(HDR(bh)->h_refcount),
 965                                          EXT2_XATTR_REFCOUNT_MAX);
 966                        } else if (!ext2_xattr_cmp(header, HDR(bh))) {
 967                                ea_bdebug(bh, "b_count=%d",
 968                                          atomic_read(&(bh->b_count)));
 969                                mb_cache_entry_touch(ea_block_cache, ce);
 970                                mb_cache_entry_put(ea_block_cache, ce);
 971                                return bh;
 972                        }
 973                        unlock_buffer(bh);
 974                        brelse(bh);
 975                }
 976                ce = mb_cache_entry_find_next(ea_block_cache, ce);
 977        }
 978        return NULL;
 979}
 980
 981#define NAME_HASH_SHIFT 5
 982#define VALUE_HASH_SHIFT 16
 983
 984/*
 985 * ext2_xattr_hash_entry()
 986 *
 987 * Compute the hash of an extended attribute.
 988 */
 989static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
 990                                         struct ext2_xattr_entry *entry)
 991{
 992        __u32 hash = 0;
 993        char *name = entry->e_name;
 994        int n;
 995
 996        for (n=0; n < entry->e_name_len; n++) {
 997                hash = (hash << NAME_HASH_SHIFT) ^
 998                       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
 999                       *name++;
1000        }
1001
1002        if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1003                __le32 *value = (__le32 *)((char *)header +
1004                        le16_to_cpu(entry->e_value_offs));
1005                for (n = (le32_to_cpu(entry->e_value_size) +
1006                     EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
1007                        hash = (hash << VALUE_HASH_SHIFT) ^
1008                               (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1009                               le32_to_cpu(*value++);
1010                }
1011        }
1012        entry->e_hash = cpu_to_le32(hash);
1013}
1014
1015#undef NAME_HASH_SHIFT
1016#undef VALUE_HASH_SHIFT
1017
1018#define BLOCK_HASH_SHIFT 16
1019
1020/*
1021 * ext2_xattr_rehash()
1022 *
1023 * Re-compute the extended attribute hash value after an entry has changed.
1024 */
1025static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1026                              struct ext2_xattr_entry *entry)
1027{
1028        struct ext2_xattr_entry *here;
1029        __u32 hash = 0;
1030        
1031        ext2_xattr_hash_entry(header, entry);
1032        here = ENTRY(header+1);
1033        while (!IS_LAST_ENTRY(here)) {
1034                if (!here->e_hash) {
1035                        /* Block is not shared if an entry's hash value == 0 */
1036                        hash = 0;
1037                        break;
1038                }
1039                hash = (hash << BLOCK_HASH_SHIFT) ^
1040                       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1041                       le32_to_cpu(here->e_hash);
1042                here = EXT2_XATTR_NEXT(here);
1043        }
1044        header->h_hash = cpu_to_le32(hash);
1045}
1046
1047#undef BLOCK_HASH_SHIFT
1048
1049#define HASH_BUCKET_BITS 10
1050
1051struct mb_cache *ext2_xattr_create_cache(void)
1052{
1053        return mb_cache_create(HASH_BUCKET_BITS);
1054}
1055
1056void ext2_xattr_destroy_cache(struct mb_cache *cache)
1057{
1058        if (cache)
1059                mb_cache_destroy(cache);
1060}
1061