linux/fs/ceph/xattr.c
<<
>>
Prefs
   1#include <linux/ceph/ceph_debug.h>
   2
   3#include "super.h"
   4#include "mds_client.h"
   5
   6#include <linux/ceph/decode.h>
   7
   8#include <linux/xattr.h>
   9#include <linux/slab.h>
  10
  11#define XATTR_CEPH_PREFIX "ceph."
  12#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  13
  14static bool ceph_is_valid_xattr(const char *name)
  15{
  16        return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  17               !strncmp(name, XATTR_SECURITY_PREFIX,
  18                        XATTR_SECURITY_PREFIX_LEN) ||
  19               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  20               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  21}
  22
  23/*
  24 * These define virtual xattrs exposing the recursive directory
  25 * statistics and layout metadata.
  26 */
  27struct ceph_vxattr {
  28        char *name;
  29        size_t name_size;       /* strlen(name) + 1 (for '\0') */
  30        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  31                              size_t size);
  32        bool readonly, hidden;
  33        bool (*exists_cb)(struct ceph_inode_info *ci);
  34};
  35
  36/* layouts */
  37
  38static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  39{
  40        size_t s;
  41        char *p = (char *)&ci->i_layout;
  42
  43        for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  44                if (*p)
  45                        return true;
  46        return false;
  47}
  48
  49static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  50                                        size_t size)
  51{
  52        int ret;
  53        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  54        struct ceph_osd_client *osdc = &fsc->client->osdc;
  55        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  56        const char *pool_name;
  57
  58        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  59        down_read(&osdc->map_sem);
  60        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  61        if (pool_name)
  62                ret = snprintf(val, size,
  63                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
  64                (unsigned long long)ceph_file_layout_su(ci->i_layout),
  65                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  66                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  67                pool_name);
  68        else
  69                ret = snprintf(val, size,
  70                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  71                (unsigned long long)ceph_file_layout_su(ci->i_layout),
  72                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  73                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  74                (unsigned long long)pool);
  75
  76        up_read(&osdc->map_sem);
  77        return ret;
  78}
  79
  80static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  81                                               char *val, size_t size)
  82{
  83        return snprintf(val, size, "%lld",
  84                        (unsigned long long)ceph_file_layout_su(ci->i_layout));
  85}
  86
  87static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  88                                                char *val, size_t size)
  89{
  90        return snprintf(val, size, "%lld",
  91               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
  92}
  93
  94static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  95                                               char *val, size_t size)
  96{
  97        return snprintf(val, size, "%lld",
  98               (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  99}
 100
 101static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 102                                        char *val, size_t size)
 103{
 104        int ret;
 105        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
 106        struct ceph_osd_client *osdc = &fsc->client->osdc;
 107        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
 108        const char *pool_name;
 109
 110        down_read(&osdc->map_sem);
 111        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 112        if (pool_name)
 113                ret = snprintf(val, size, "%s", pool_name);
 114        else
 115                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
 116        up_read(&osdc->map_sem);
 117        return ret;
 118}
 119
 120/* directories */
 121
 122static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
 123                                        size_t size)
 124{
 125        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
 126}
 127
 128static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
 129                                      size_t size)
 130{
 131        return snprintf(val, size, "%lld", ci->i_files);
 132}
 133
 134static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
 135                                        size_t size)
 136{
 137        return snprintf(val, size, "%lld", ci->i_subdirs);
 138}
 139
 140static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
 141                                         size_t size)
 142{
 143        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
 144}
 145
 146static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
 147                                       size_t size)
 148{
 149        return snprintf(val, size, "%lld", ci->i_rfiles);
 150}
 151
 152static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
 153                                         size_t size)
 154{
 155        return snprintf(val, size, "%lld", ci->i_rsubdirs);
 156}
 157
 158static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
 159                                       size_t size)
 160{
 161        return snprintf(val, size, "%lld", ci->i_rbytes);
 162}
 163
 164static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 165                                       size_t size)
 166{
 167        return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
 168                        (long)ci->i_rctime.tv_nsec);
 169}
 170
 171
 172#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
 173#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
 174        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
 175
 176#define XATTR_NAME_CEPH(_type, _name)                                   \
 177        {                                                               \
 178                .name = CEPH_XATTR_NAME(_type, _name),                  \
 179                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
 180                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
 181                .readonly = true,                               \
 182                .hidden = false,                                \
 183                .exists_cb = NULL,                      \
 184        }
 185#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
 186        {                                                               \
 187                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
 188                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
 189                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
 190                .readonly = false,                              \
 191                .hidden = true,                 \
 192                .exists_cb = ceph_vxattrcb_layout_exists,       \
 193        }
 194
 195static struct ceph_vxattr ceph_dir_vxattrs[] = {
 196        {
 197                .name = "ceph.dir.layout",
 198                .name_size = sizeof("ceph.dir.layout"),
 199                .getxattr_cb = ceph_vxattrcb_layout,
 200                .readonly = false,
 201                .hidden = false,
 202                .exists_cb = ceph_vxattrcb_layout_exists,
 203        },
 204        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
 205        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
 206        XATTR_LAYOUT_FIELD(dir, layout, object_size),
 207        XATTR_LAYOUT_FIELD(dir, layout, pool),
 208        XATTR_NAME_CEPH(dir, entries),
 209        XATTR_NAME_CEPH(dir, files),
 210        XATTR_NAME_CEPH(dir, subdirs),
 211        XATTR_NAME_CEPH(dir, rentries),
 212        XATTR_NAME_CEPH(dir, rfiles),
 213        XATTR_NAME_CEPH(dir, rsubdirs),
 214        XATTR_NAME_CEPH(dir, rbytes),
 215        XATTR_NAME_CEPH(dir, rctime),
 216        { .name = NULL, 0 }     /* Required table terminator */
 217};
 218static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 219
 220/* files */
 221
 222static struct ceph_vxattr ceph_file_vxattrs[] = {
 223        {
 224                .name = "ceph.file.layout",
 225                .name_size = sizeof("ceph.file.layout"),
 226                .getxattr_cb = ceph_vxattrcb_layout,
 227                .readonly = false,
 228                .hidden = false,
 229                .exists_cb = ceph_vxattrcb_layout_exists,
 230        },
 231        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
 232        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
 233        XATTR_LAYOUT_FIELD(file, layout, object_size),
 234        XATTR_LAYOUT_FIELD(file, layout, pool),
 235        { .name = NULL, 0 }     /* Required table terminator */
 236};
 237static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
 238
 239static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
 240{
 241        if (S_ISDIR(inode->i_mode))
 242                return ceph_dir_vxattrs;
 243        else if (S_ISREG(inode->i_mode))
 244                return ceph_file_vxattrs;
 245        return NULL;
 246}
 247
 248static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
 249{
 250        if (vxattrs == ceph_dir_vxattrs)
 251                return ceph_dir_vxattrs_name_size;
 252        if (vxattrs == ceph_file_vxattrs)
 253                return ceph_file_vxattrs_name_size;
 254        BUG();
 255
 256        return 0;
 257}
 258
 259/*
 260 * Compute the aggregate size (including terminating '\0') of all
 261 * virtual extended attribute names in the given vxattr table.
 262 */
 263static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
 264{
 265        struct ceph_vxattr *vxattr;
 266        size_t size = 0;
 267
 268        for (vxattr = vxattrs; vxattr->name; vxattr++)
 269                if (!vxattr->hidden)
 270                        size += vxattr->name_size;
 271
 272        return size;
 273}
 274
 275/* Routines called at initialization and exit time */
 276
 277void __init ceph_xattr_init(void)
 278{
 279        ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
 280        ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
 281}
 282
 283void ceph_xattr_exit(void)
 284{
 285        ceph_dir_vxattrs_name_size = 0;
 286        ceph_file_vxattrs_name_size = 0;
 287}
 288
 289static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 290                                                const char *name)
 291{
 292        struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
 293
 294        if (vxattr) {
 295                while (vxattr->name) {
 296                        if (!strcmp(vxattr->name, name))
 297                                return vxattr;
 298                        vxattr++;
 299                }
 300        }
 301
 302        return NULL;
 303}
 304
 305static int __set_xattr(struct ceph_inode_info *ci,
 306                           const char *name, int name_len,
 307                           const char *val, int val_len,
 308                           int dirty,
 309                           int should_free_name, int should_free_val,
 310                           struct ceph_inode_xattr **newxattr)
 311{
 312        struct rb_node **p;
 313        struct rb_node *parent = NULL;
 314        struct ceph_inode_xattr *xattr = NULL;
 315        int c;
 316        int new = 0;
 317
 318        p = &ci->i_xattrs.index.rb_node;
 319        while (*p) {
 320                parent = *p;
 321                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 322                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 323                if (c < 0)
 324                        p = &(*p)->rb_left;
 325                else if (c > 0)
 326                        p = &(*p)->rb_right;
 327                else {
 328                        if (name_len == xattr->name_len)
 329                                break;
 330                        else if (name_len < xattr->name_len)
 331                                p = &(*p)->rb_left;
 332                        else
 333                                p = &(*p)->rb_right;
 334                }
 335                xattr = NULL;
 336        }
 337
 338        if (!xattr) {
 339                new = 1;
 340                xattr = *newxattr;
 341                xattr->name = name;
 342                xattr->name_len = name_len;
 343                xattr->should_free_name = should_free_name;
 344
 345                ci->i_xattrs.count++;
 346                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 347        } else {
 348                kfree(*newxattr);
 349                *newxattr = NULL;
 350                if (xattr->should_free_val)
 351                        kfree((void *)xattr->val);
 352
 353                if (should_free_name) {
 354                        kfree((void *)name);
 355                        name = xattr->name;
 356                }
 357                ci->i_xattrs.names_size -= xattr->name_len;
 358                ci->i_xattrs.vals_size -= xattr->val_len;
 359        }
 360        ci->i_xattrs.names_size += name_len;
 361        ci->i_xattrs.vals_size += val_len;
 362        if (val)
 363                xattr->val = val;
 364        else
 365                xattr->val = "";
 366
 367        xattr->val_len = val_len;
 368        xattr->dirty = dirty;
 369        xattr->should_free_val = (val && should_free_val);
 370
 371        if (new) {
 372                rb_link_node(&xattr->node, parent, p);
 373                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 374                dout("__set_xattr_val p=%p\n", p);
 375        }
 376
 377        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 378             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 379
 380        return 0;
 381}
 382
 383static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 384                           const char *name)
 385{
 386        struct rb_node **p;
 387        struct rb_node *parent = NULL;
 388        struct ceph_inode_xattr *xattr = NULL;
 389        int name_len = strlen(name);
 390        int c;
 391
 392        p = &ci->i_xattrs.index.rb_node;
 393        while (*p) {
 394                parent = *p;
 395                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 396                c = strncmp(name, xattr->name, xattr->name_len);
 397                if (c == 0 && name_len > xattr->name_len)
 398                        c = 1;
 399                if (c < 0)
 400                        p = &(*p)->rb_left;
 401                else if (c > 0)
 402                        p = &(*p)->rb_right;
 403                else {
 404                        dout("__get_xattr %s: found %.*s\n", name,
 405                             xattr->val_len, xattr->val);
 406                        return xattr;
 407                }
 408        }
 409
 410        dout("__get_xattr %s: not found\n", name);
 411
 412        return NULL;
 413}
 414
 415static void __free_xattr(struct ceph_inode_xattr *xattr)
 416{
 417        BUG_ON(!xattr);
 418
 419        if (xattr->should_free_name)
 420                kfree((void *)xattr->name);
 421        if (xattr->should_free_val)
 422                kfree((void *)xattr->val);
 423
 424        kfree(xattr);
 425}
 426
 427static int __remove_xattr(struct ceph_inode_info *ci,
 428                          struct ceph_inode_xattr *xattr)
 429{
 430        if (!xattr)
 431                return -EOPNOTSUPP;
 432
 433        rb_erase(&xattr->node, &ci->i_xattrs.index);
 434
 435        if (xattr->should_free_name)
 436                kfree((void *)xattr->name);
 437        if (xattr->should_free_val)
 438                kfree((void *)xattr->val);
 439
 440        ci->i_xattrs.names_size -= xattr->name_len;
 441        ci->i_xattrs.vals_size -= xattr->val_len;
 442        ci->i_xattrs.count--;
 443        kfree(xattr);
 444
 445        return 0;
 446}
 447
 448static int __remove_xattr_by_name(struct ceph_inode_info *ci,
 449                           const char *name)
 450{
 451        struct rb_node **p;
 452        struct ceph_inode_xattr *xattr;
 453        int err;
 454
 455        p = &ci->i_xattrs.index.rb_node;
 456        xattr = __get_xattr(ci, name);
 457        err = __remove_xattr(ci, xattr);
 458        return err;
 459}
 460
 461static char *__copy_xattr_names(struct ceph_inode_info *ci,
 462                                char *dest)
 463{
 464        struct rb_node *p;
 465        struct ceph_inode_xattr *xattr = NULL;
 466
 467        p = rb_first(&ci->i_xattrs.index);
 468        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 469
 470        while (p) {
 471                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 472                memcpy(dest, xattr->name, xattr->name_len);
 473                dest[xattr->name_len] = '\0';
 474
 475                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 476                     xattr->name_len, ci->i_xattrs.names_size);
 477
 478                dest += xattr->name_len + 1;
 479                p = rb_next(p);
 480        }
 481
 482        return dest;
 483}
 484
 485void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 486{
 487        struct rb_node *p, *tmp;
 488        struct ceph_inode_xattr *xattr = NULL;
 489
 490        p = rb_first(&ci->i_xattrs.index);
 491
 492        dout("__ceph_destroy_xattrs p=%p\n", p);
 493
 494        while (p) {
 495                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 496                tmp = p;
 497                p = rb_next(tmp);
 498                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 499                     xattr->name_len, xattr->name);
 500                rb_erase(tmp, &ci->i_xattrs.index);
 501
 502                __free_xattr(xattr);
 503        }
 504
 505        ci->i_xattrs.names_size = 0;
 506        ci->i_xattrs.vals_size = 0;
 507        ci->i_xattrs.index_version = 0;
 508        ci->i_xattrs.count = 0;
 509        ci->i_xattrs.index = RB_ROOT;
 510}
 511
 512static int __build_xattrs(struct inode *inode)
 513        __releases(ci->i_ceph_lock)
 514        __acquires(ci->i_ceph_lock)
 515{
 516        u32 namelen;
 517        u32 numattr = 0;
 518        void *p, *end;
 519        u32 len;
 520        const char *name, *val;
 521        struct ceph_inode_info *ci = ceph_inode(inode);
 522        int xattr_version;
 523        struct ceph_inode_xattr **xattrs = NULL;
 524        int err = 0;
 525        int i;
 526
 527        dout("__build_xattrs() len=%d\n",
 528             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 529
 530        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 531                return 0; /* already built */
 532
 533        __ceph_destroy_xattrs(ci);
 534
 535start:
 536        /* updated internal xattr rb tree */
 537        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 538                p = ci->i_xattrs.blob->vec.iov_base;
 539                end = p + ci->i_xattrs.blob->vec.iov_len;
 540                ceph_decode_32_safe(&p, end, numattr, bad);
 541                xattr_version = ci->i_xattrs.version;
 542                spin_unlock(&ci->i_ceph_lock);
 543
 544                xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
 545                                 GFP_NOFS);
 546                err = -ENOMEM;
 547                if (!xattrs)
 548                        goto bad_lock;
 549                memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
 550                for (i = 0; i < numattr; i++) {
 551                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 552                                            GFP_NOFS);
 553                        if (!xattrs[i])
 554                                goto bad_lock;
 555                }
 556
 557                spin_lock(&ci->i_ceph_lock);
 558                if (ci->i_xattrs.version != xattr_version) {
 559                        /* lost a race, retry */
 560                        for (i = 0; i < numattr; i++)
 561                                kfree(xattrs[i]);
 562                        kfree(xattrs);
 563                        xattrs = NULL;
 564                        goto start;
 565                }
 566                err = -EIO;
 567                while (numattr--) {
 568                        ceph_decode_32_safe(&p, end, len, bad);
 569                        namelen = len;
 570                        name = p;
 571                        p += len;
 572                        ceph_decode_32_safe(&p, end, len, bad);
 573                        val = p;
 574                        p += len;
 575
 576                        err = __set_xattr(ci, name, namelen, val, len,
 577                                          0, 0, 0, &xattrs[numattr]);
 578
 579                        if (err < 0)
 580                                goto bad;
 581                }
 582                kfree(xattrs);
 583        }
 584        ci->i_xattrs.index_version = ci->i_xattrs.version;
 585        ci->i_xattrs.dirty = false;
 586
 587        return err;
 588bad_lock:
 589        spin_lock(&ci->i_ceph_lock);
 590bad:
 591        if (xattrs) {
 592                for (i = 0; i < numattr; i++)
 593                        kfree(xattrs[i]);
 594                kfree(xattrs);
 595        }
 596        ci->i_xattrs.names_size = 0;
 597        return err;
 598}
 599
 600static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 601                                    int val_size)
 602{
 603        /*
 604         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 605         * 4 bytes per each value
 606         */
 607        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 608                             ci->i_xattrs.names_size +
 609                             ci->i_xattrs.vals_size;
 610        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 611             ci->i_xattrs.count, ci->i_xattrs.names_size,
 612             ci->i_xattrs.vals_size);
 613
 614        if (name_size)
 615                size += 4 + 4 + name_size + val_size;
 616
 617        return size;
 618}
 619
 620/*
 621 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 622 * and swap into place.
 623 */
 624void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 625{
 626        struct rb_node *p;
 627        struct ceph_inode_xattr *xattr = NULL;
 628        void *dest;
 629
 630        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 631        if (ci->i_xattrs.dirty) {
 632                int need = __get_required_blob_size(ci, 0, 0);
 633
 634                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 635
 636                p = rb_first(&ci->i_xattrs.index);
 637                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 638
 639                ceph_encode_32(&dest, ci->i_xattrs.count);
 640                while (p) {
 641                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 642
 643                        ceph_encode_32(&dest, xattr->name_len);
 644                        memcpy(dest, xattr->name, xattr->name_len);
 645                        dest += xattr->name_len;
 646                        ceph_encode_32(&dest, xattr->val_len);
 647                        memcpy(dest, xattr->val, xattr->val_len);
 648                        dest += xattr->val_len;
 649
 650                        p = rb_next(p);
 651                }
 652
 653                /* adjust buffer len; it may be larger than we need */
 654                ci->i_xattrs.prealloc_blob->vec.iov_len =
 655                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 656
 657                if (ci->i_xattrs.blob)
 658                        ceph_buffer_put(ci->i_xattrs.blob);
 659                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 660                ci->i_xattrs.prealloc_blob = NULL;
 661                ci->i_xattrs.dirty = false;
 662                ci->i_xattrs.version++;
 663        }
 664}
 665
 666ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 667                      size_t size)
 668{
 669        struct inode *inode = dentry->d_inode;
 670        struct ceph_inode_info *ci = ceph_inode(inode);
 671        int err;
 672        struct ceph_inode_xattr *xattr;
 673        struct ceph_vxattr *vxattr = NULL;
 674
 675        if (!ceph_is_valid_xattr(name))
 676                return -ENODATA;
 677
 678
 679        /* let's see if a virtual xattr was requested */
 680        vxattr = ceph_match_vxattr(inode, name);
 681        if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
 682                err = vxattr->getxattr_cb(ci, value, size);
 683                return err;
 684        }
 685
 686        spin_lock(&ci->i_ceph_lock);
 687        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 688             ci->i_xattrs.version, ci->i_xattrs.index_version);
 689
 690        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
 691            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
 692                goto get_xattr;
 693        } else {
 694                spin_unlock(&ci->i_ceph_lock);
 695                /* get xattrs from mds (if we don't already have them) */
 696                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
 697                if (err)
 698                        return err;
 699        }
 700
 701        spin_lock(&ci->i_ceph_lock);
 702
 703        err = __build_xattrs(inode);
 704        if (err < 0)
 705                goto out;
 706
 707get_xattr:
 708        err = -ENODATA;  /* == ENOATTR */
 709        xattr = __get_xattr(ci, name);
 710        if (!xattr)
 711                goto out;
 712
 713        err = -ERANGE;
 714        if (size && size < xattr->val_len)
 715                goto out;
 716
 717        err = xattr->val_len;
 718        if (size == 0)
 719                goto out;
 720
 721        memcpy(value, xattr->val, xattr->val_len);
 722
 723out:
 724        spin_unlock(&ci->i_ceph_lock);
 725        return err;
 726}
 727
 728ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 729{
 730        struct inode *inode = dentry->d_inode;
 731        struct ceph_inode_info *ci = ceph_inode(inode);
 732        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
 733        u32 vir_namelen = 0;
 734        u32 namelen;
 735        int err;
 736        u32 len;
 737        int i;
 738
 739        spin_lock(&ci->i_ceph_lock);
 740        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 741             ci->i_xattrs.version, ci->i_xattrs.index_version);
 742
 743        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
 744            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
 745                goto list_xattr;
 746        } else {
 747                spin_unlock(&ci->i_ceph_lock);
 748                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
 749                if (err)
 750                        return err;
 751        }
 752
 753        spin_lock(&ci->i_ceph_lock);
 754
 755        err = __build_xattrs(inode);
 756        if (err < 0)
 757                goto out;
 758
 759list_xattr:
 760        /*
 761         * Start with virtual dir xattr names (if any) (including
 762         * terminating '\0' characters for each).
 763         */
 764        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 765
 766        /* adding 1 byte per each variable due to the null termination */
 767        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
 768        err = -ERANGE;
 769        if (size && vir_namelen + namelen > size)
 770                goto out;
 771
 772        err = namelen + vir_namelen;
 773        if (size == 0)
 774                goto out;
 775
 776        names = __copy_xattr_names(ci, names);
 777
 778        /* virtual xattr names, too */
 779        err = namelen;
 780        if (vxattrs) {
 781                for (i = 0; vxattrs[i].name; i++) {
 782                        if (!vxattrs[i].hidden &&
 783                            !(vxattrs[i].exists_cb &&
 784                              !vxattrs[i].exists_cb(ci))) {
 785                                len = sprintf(names, "%s", vxattrs[i].name);
 786                                names += len + 1;
 787                                err += len + 1;
 788                        }
 789                }
 790        }
 791
 792out:
 793        spin_unlock(&ci->i_ceph_lock);
 794        return err;
 795}
 796
 797static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 798                              const char *value, size_t size, int flags)
 799{
 800        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 801        struct inode *inode = dentry->d_inode;
 802        struct ceph_inode_info *ci = ceph_inode(inode);
 803        struct inode *parent_inode;
 804        struct ceph_mds_request *req;
 805        struct ceph_mds_client *mdsc = fsc->mdsc;
 806        int err;
 807        int i, nr_pages;
 808        struct page **pages = NULL;
 809        void *kaddr;
 810
 811        /* copy value into some pages */
 812        nr_pages = calc_pages_for(0, size);
 813        if (nr_pages) {
 814                pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
 815                if (!pages)
 816                        return -ENOMEM;
 817                err = -ENOMEM;
 818                for (i = 0; i < nr_pages; i++) {
 819                        pages[i] = __page_cache_alloc(GFP_NOFS);
 820                        if (!pages[i]) {
 821                                nr_pages = i;
 822                                goto out;
 823                        }
 824                        kaddr = kmap(pages[i]);
 825                        memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
 826                               min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
 827                }
 828        }
 829
 830        dout("setxattr value=%.*s\n", (int)size, value);
 831
 832        /* do request */
 833        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
 834                                       USE_AUTH_MDS);
 835        if (IS_ERR(req)) {
 836                err = PTR_ERR(req);
 837                goto out;
 838        }
 839        req->r_inode = inode;
 840        ihold(inode);
 841        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 842        req->r_num_caps = 1;
 843        req->r_args.setxattr.flags = cpu_to_le32(flags);
 844        req->r_path2 = kstrdup(name, GFP_NOFS);
 845
 846        req->r_pages = pages;
 847        req->r_num_pages = nr_pages;
 848        req->r_data_len = size;
 849
 850        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 851        parent_inode = ceph_get_dentry_parent_inode(dentry);
 852        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 853        iput(parent_inode);
 854        ceph_mdsc_put_request(req);
 855        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 856
 857out:
 858        if (pages) {
 859                for (i = 0; i < nr_pages; i++)
 860                        __free_page(pages[i]);
 861                kfree(pages);
 862        }
 863        return err;
 864}
 865
 866int ceph_setxattr(struct dentry *dentry, const char *name,
 867                  const void *value, size_t size, int flags)
 868{
 869        struct inode *inode = dentry->d_inode;
 870        struct ceph_vxattr *vxattr;
 871        struct ceph_inode_info *ci = ceph_inode(inode);
 872        int issued;
 873        int err;
 874        int dirty;
 875        int name_len = strlen(name);
 876        int val_len = size;
 877        char *newname = NULL;
 878        char *newval = NULL;
 879        struct ceph_inode_xattr *xattr = NULL;
 880        int required_blob_size;
 881
 882        if (ceph_snap(inode) != CEPH_NOSNAP)
 883                return -EROFS;
 884
 885        if (!ceph_is_valid_xattr(name))
 886                return -EOPNOTSUPP;
 887
 888        vxattr = ceph_match_vxattr(inode, name);
 889        if (vxattr && vxattr->readonly)
 890                return -EOPNOTSUPP;
 891
 892        /* pass any unhandled ceph.* xattrs through to the MDS */
 893        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
 894                goto do_sync_unlocked;
 895
 896        /* preallocate memory for xattr name, value, index node */
 897        err = -ENOMEM;
 898        newname = kmemdup(name, name_len + 1, GFP_NOFS);
 899        if (!newname)
 900                goto out;
 901
 902        if (val_len) {
 903                newval = kmemdup(value, val_len, GFP_NOFS);
 904                if (!newval)
 905                        goto out;
 906        }
 907
 908        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
 909        if (!xattr)
 910                goto out;
 911
 912        spin_lock(&ci->i_ceph_lock);
 913retry:
 914        issued = __ceph_caps_issued(ci, NULL);
 915        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
 916        if (!(issued & CEPH_CAP_XATTR_EXCL))
 917                goto do_sync;
 918        __build_xattrs(inode);
 919
 920        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
 921
 922        if (!ci->i_xattrs.prealloc_blob ||
 923            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 924                struct ceph_buffer *blob;
 925
 926                spin_unlock(&ci->i_ceph_lock);
 927                dout(" preaallocating new blob size=%d\n", required_blob_size);
 928                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
 929                if (!blob)
 930                        goto out;
 931                spin_lock(&ci->i_ceph_lock);
 932                if (ci->i_xattrs.prealloc_blob)
 933                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
 934                ci->i_xattrs.prealloc_blob = blob;
 935                goto retry;
 936        }
 937
 938        err = __set_xattr(ci, newname, name_len, newval,
 939                          val_len, 1, 1, 1, &xattr);
 940
 941        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 942        ci->i_xattrs.dirty = true;
 943        inode->i_ctime = CURRENT_TIME;
 944
 945        spin_unlock(&ci->i_ceph_lock);
 946        if (dirty)
 947                __mark_inode_dirty(inode, dirty);
 948        return err;
 949
 950do_sync:
 951        spin_unlock(&ci->i_ceph_lock);
 952do_sync_unlocked:
 953        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 954out:
 955        kfree(newname);
 956        kfree(newval);
 957        kfree(xattr);
 958        return err;
 959}
 960
 961static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 962{
 963        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 964        struct ceph_mds_client *mdsc = fsc->mdsc;
 965        struct inode *inode = dentry->d_inode;
 966        struct inode *parent_inode;
 967        struct ceph_mds_request *req;
 968        int err;
 969
 970        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
 971                                       USE_AUTH_MDS);
 972        if (IS_ERR(req))
 973                return PTR_ERR(req);
 974        req->r_inode = inode;
 975        ihold(inode);
 976        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 977        req->r_num_caps = 1;
 978        req->r_path2 = kstrdup(name, GFP_NOFS);
 979
 980        parent_inode = ceph_get_dentry_parent_inode(dentry);
 981        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 982        iput(parent_inode);
 983        ceph_mdsc_put_request(req);
 984        return err;
 985}
 986
 987int ceph_removexattr(struct dentry *dentry, const char *name)
 988{
 989        struct inode *inode = dentry->d_inode;
 990        struct ceph_vxattr *vxattr;
 991        struct ceph_inode_info *ci = ceph_inode(inode);
 992        int issued;
 993        int err;
 994        int required_blob_size;
 995        int dirty;
 996
 997        if (ceph_snap(inode) != CEPH_NOSNAP)
 998                return -EROFS;
 999
1000        if (!ceph_is_valid_xattr(name))
1001                return -EOPNOTSUPP;
1002
1003        vxattr = ceph_match_vxattr(inode, name);
1004        if (vxattr && vxattr->readonly)
1005                return -EOPNOTSUPP;
1006
1007        /* pass any unhandled ceph.* xattrs through to the MDS */
1008        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1009                goto do_sync_unlocked;
1010
1011        err = -ENOMEM;
1012        spin_lock(&ci->i_ceph_lock);
1013retry:
1014        issued = __ceph_caps_issued(ci, NULL);
1015        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1016
1017        if (!(issued & CEPH_CAP_XATTR_EXCL))
1018                goto do_sync;
1019        __build_xattrs(inode);
1020
1021        required_blob_size = __get_required_blob_size(ci, 0, 0);
1022
1023        if (!ci->i_xattrs.prealloc_blob ||
1024            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1025                struct ceph_buffer *blob;
1026
1027                spin_unlock(&ci->i_ceph_lock);
1028                dout(" preaallocating new blob size=%d\n", required_blob_size);
1029                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1030                if (!blob)
1031                        goto out;
1032                spin_lock(&ci->i_ceph_lock);
1033                if (ci->i_xattrs.prealloc_blob)
1034                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1035                ci->i_xattrs.prealloc_blob = blob;
1036                goto retry;
1037        }
1038
1039        err = __remove_xattr_by_name(ceph_inode(inode), name);
1040
1041        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
1042        ci->i_xattrs.dirty = true;
1043        inode->i_ctime = CURRENT_TIME;
1044        spin_unlock(&ci->i_ceph_lock);
1045        if (dirty)
1046                __mark_inode_dirty(inode, dirty);
1047        return err;
1048do_sync:
1049        spin_unlock(&ci->i_ceph_lock);
1050do_sync_unlocked:
1051        err = ceph_send_removexattr(dentry, name);
1052out:
1053        return err;
1054}
1055
1056