linux/fs/ceph/xattr.c
<<
>>
Prefs
   1#include <linux/ceph/ceph_debug.h>
   2#include <linux/ceph/pagelist.h>
   3
   4#include "super.h"
   5#include "mds_client.h"
   6
   7#include <linux/ceph/decode.h>
   8
   9#include <linux/xattr.h>
  10#include <linux/posix_acl_xattr.h>
  11#include <linux/slab.h>
  12
  13#define XATTR_CEPH_PREFIX "ceph."
  14#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  15
  16static int __remove_xattr(struct ceph_inode_info *ci,
  17                          struct ceph_inode_xattr *xattr);
  18
  19/*
  20 * List of handlers for synthetic system.* attributes. Other
  21 * attributes are handled directly.
  22 */
  23const struct xattr_handler *ceph_xattr_handlers[] = {
  24#ifdef CONFIG_CEPH_FS_POSIX_ACL
  25        &posix_acl_access_xattr_handler,
  26        &posix_acl_default_xattr_handler,
  27#endif
  28        NULL,
  29};
  30
  31static bool ceph_is_valid_xattr(const char *name)
  32{
  33        return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  34               !strncmp(name, XATTR_SECURITY_PREFIX,
  35                        XATTR_SECURITY_PREFIX_LEN) ||
  36               !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
  37               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  38               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  39}
  40
  41/*
  42 * These define virtual xattrs exposing the recursive directory
  43 * statistics and layout metadata.
  44 */
  45struct ceph_vxattr {
  46        char *name;
  47        size_t name_size;       /* strlen(name) + 1 (for '\0') */
  48        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  49                              size_t size);
  50        bool readonly, hidden;
  51        bool (*exists_cb)(struct ceph_inode_info *ci);
  52};
  53
  54/* layouts */
  55
  56static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  57{
  58        size_t s;
  59        char *p = (char *)&ci->i_layout;
  60
  61        for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  62                if (*p)
  63                        return true;
  64        return false;
  65}
  66
  67static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  68                                   size_t size)
  69{
  70        int ret;
  71        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  72        struct ceph_osd_client *osdc = &fsc->client->osdc;
  73        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  74        const char *pool_name;
  75        char buf[128];
  76
  77        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  78        down_read(&osdc->map_sem);
  79        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  80        if (pool_name) {
  81                size_t len = strlen(pool_name);
  82                ret = snprintf(buf, sizeof(buf),
  83                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
  84                (unsigned long long)ceph_file_layout_su(ci->i_layout),
  85                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  86                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  87                if (!size) {
  88                        ret += len;
  89                } else if (ret + len > size) {
  90                        ret = -ERANGE;
  91                } else {
  92                        memcpy(val, buf, ret);
  93                        memcpy(val + ret, pool_name, len);
  94                        ret += len;
  95                }
  96        } else {
  97                ret = snprintf(buf, sizeof(buf),
  98                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  99                (unsigned long long)ceph_file_layout_su(ci->i_layout),
 100                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
 101                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
 102                (unsigned long long)pool);
 103                if (size) {
 104                        if (ret <= size)
 105                                memcpy(val, buf, ret);
 106                        else
 107                                ret = -ERANGE;
 108                }
 109        }
 110        up_read(&osdc->map_sem);
 111        return ret;
 112}
 113
 114static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
 115                                               char *val, size_t size)
 116{
 117        return snprintf(val, size, "%lld",
 118                        (unsigned long long)ceph_file_layout_su(ci->i_layout));
 119}
 120
 121static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
 122                                                char *val, size_t size)
 123{
 124        return snprintf(val, size, "%lld",
 125               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
 126}
 127
 128static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
 129                                               char *val, size_t size)
 130{
 131        return snprintf(val, size, "%lld",
 132               (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
 133}
 134
 135static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 136                                        char *val, size_t size)
 137{
 138        int ret;
 139        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
 140        struct ceph_osd_client *osdc = &fsc->client->osdc;
 141        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
 142        const char *pool_name;
 143
 144        down_read(&osdc->map_sem);
 145        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 146        if (pool_name)
 147                ret = snprintf(val, size, "%s", pool_name);
 148        else
 149                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
 150        up_read(&osdc->map_sem);
 151        return ret;
 152}
 153
 154/* directories */
 155
 156static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
 157                                        size_t size)
 158{
 159        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
 160}
 161
 162static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
 163                                      size_t size)
 164{
 165        return snprintf(val, size, "%lld", ci->i_files);
 166}
 167
 168static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
 169                                        size_t size)
 170{
 171        return snprintf(val, size, "%lld", ci->i_subdirs);
 172}
 173
 174static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
 175                                         size_t size)
 176{
 177        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
 178}
 179
 180static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
 181                                       size_t size)
 182{
 183        return snprintf(val, size, "%lld", ci->i_rfiles);
 184}
 185
 186static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
 187                                         size_t size)
 188{
 189        return snprintf(val, size, "%lld", ci->i_rsubdirs);
 190}
 191
 192static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
 193                                       size_t size)
 194{
 195        return snprintf(val, size, "%lld", ci->i_rbytes);
 196}
 197
 198static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 199                                       size_t size)
 200{
 201        return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
 202                        (long)ci->i_rctime.tv_nsec);
 203}
 204
 205
 206#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
 207#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
 208        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
 209
 210#define XATTR_NAME_CEPH(_type, _name)                                   \
 211        {                                                               \
 212                .name = CEPH_XATTR_NAME(_type, _name),                  \
 213                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
 214                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
 215                .readonly = true,                               \
 216                .hidden = false,                                \
 217                .exists_cb = NULL,                      \
 218        }
 219#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
 220        {                                                               \
 221                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
 222                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
 223                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
 224                .readonly = false,                              \
 225                .hidden = true,                 \
 226                .exists_cb = ceph_vxattrcb_layout_exists,       \
 227        }
 228
 229static struct ceph_vxattr ceph_dir_vxattrs[] = {
 230        {
 231                .name = "ceph.dir.layout",
 232                .name_size = sizeof("ceph.dir.layout"),
 233                .getxattr_cb = ceph_vxattrcb_layout,
 234                .readonly = false,
 235                .hidden = true,
 236                .exists_cb = ceph_vxattrcb_layout_exists,
 237        },
 238        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
 239        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
 240        XATTR_LAYOUT_FIELD(dir, layout, object_size),
 241        XATTR_LAYOUT_FIELD(dir, layout, pool),
 242        XATTR_NAME_CEPH(dir, entries),
 243        XATTR_NAME_CEPH(dir, files),
 244        XATTR_NAME_CEPH(dir, subdirs),
 245        XATTR_NAME_CEPH(dir, rentries),
 246        XATTR_NAME_CEPH(dir, rfiles),
 247        XATTR_NAME_CEPH(dir, rsubdirs),
 248        XATTR_NAME_CEPH(dir, rbytes),
 249        XATTR_NAME_CEPH(dir, rctime),
 250        { .name = NULL, 0 }     /* Required table terminator */
 251};
 252static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 253
 254/* files */
 255
 256static struct ceph_vxattr ceph_file_vxattrs[] = {
 257        {
 258                .name = "ceph.file.layout",
 259                .name_size = sizeof("ceph.file.layout"),
 260                .getxattr_cb = ceph_vxattrcb_layout,
 261                .readonly = false,
 262                .hidden = true,
 263                .exists_cb = ceph_vxattrcb_layout_exists,
 264        },
 265        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
 266        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
 267        XATTR_LAYOUT_FIELD(file, layout, object_size),
 268        XATTR_LAYOUT_FIELD(file, layout, pool),
 269        { .name = NULL, 0 }     /* Required table terminator */
 270};
 271static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
 272
 273static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
 274{
 275        if (S_ISDIR(inode->i_mode))
 276                return ceph_dir_vxattrs;
 277        else if (S_ISREG(inode->i_mode))
 278                return ceph_file_vxattrs;
 279        return NULL;
 280}
 281
 282static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
 283{
 284        if (vxattrs == ceph_dir_vxattrs)
 285                return ceph_dir_vxattrs_name_size;
 286        if (vxattrs == ceph_file_vxattrs)
 287                return ceph_file_vxattrs_name_size;
 288        BUG_ON(vxattrs);
 289        return 0;
 290}
 291
 292/*
 293 * Compute the aggregate size (including terminating '\0') of all
 294 * virtual extended attribute names in the given vxattr table.
 295 */
 296static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
 297{
 298        struct ceph_vxattr *vxattr;
 299        size_t size = 0;
 300
 301        for (vxattr = vxattrs; vxattr->name; vxattr++)
 302                if (!vxattr->hidden)
 303                        size += vxattr->name_size;
 304
 305        return size;
 306}
 307
 308/* Routines called at initialization and exit time */
 309
 310void __init ceph_xattr_init(void)
 311{
 312        ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
 313        ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
 314}
 315
 316void ceph_xattr_exit(void)
 317{
 318        ceph_dir_vxattrs_name_size = 0;
 319        ceph_file_vxattrs_name_size = 0;
 320}
 321
 322static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 323                                                const char *name)
 324{
 325        struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
 326
 327        if (vxattr) {
 328                while (vxattr->name) {
 329                        if (!strcmp(vxattr->name, name))
 330                                return vxattr;
 331                        vxattr++;
 332                }
 333        }
 334
 335        return NULL;
 336}
 337
 338static int __set_xattr(struct ceph_inode_info *ci,
 339                           const char *name, int name_len,
 340                           const char *val, int val_len,
 341                           int flags, int update_xattr,
 342                           struct ceph_inode_xattr **newxattr)
 343{
 344        struct rb_node **p;
 345        struct rb_node *parent = NULL;
 346        struct ceph_inode_xattr *xattr = NULL;
 347        int c;
 348        int new = 0;
 349
 350        p = &ci->i_xattrs.index.rb_node;
 351        while (*p) {
 352                parent = *p;
 353                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 354                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 355                if (c < 0)
 356                        p = &(*p)->rb_left;
 357                else if (c > 0)
 358                        p = &(*p)->rb_right;
 359                else {
 360                        if (name_len == xattr->name_len)
 361                                break;
 362                        else if (name_len < xattr->name_len)
 363                                p = &(*p)->rb_left;
 364                        else
 365                                p = &(*p)->rb_right;
 366                }
 367                xattr = NULL;
 368        }
 369
 370        if (update_xattr) {
 371                int err = 0;
 372                if (xattr && (flags & XATTR_CREATE))
 373                        err = -EEXIST;
 374                else if (!xattr && (flags & XATTR_REPLACE))
 375                        err = -ENODATA;
 376                if (err) {
 377                        kfree(name);
 378                        kfree(val);
 379                        return err;
 380                }
 381                if (update_xattr < 0) {
 382                        if (xattr)
 383                                __remove_xattr(ci, xattr);
 384                        kfree(name);
 385                        return 0;
 386                }
 387        }
 388
 389        if (!xattr) {
 390                new = 1;
 391                xattr = *newxattr;
 392                xattr->name = name;
 393                xattr->name_len = name_len;
 394                xattr->should_free_name = update_xattr;
 395
 396                ci->i_xattrs.count++;
 397                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 398        } else {
 399                kfree(*newxattr);
 400                *newxattr = NULL;
 401                if (xattr->should_free_val)
 402                        kfree((void *)xattr->val);
 403
 404                if (update_xattr) {
 405                        kfree((void *)name);
 406                        name = xattr->name;
 407                }
 408                ci->i_xattrs.names_size -= xattr->name_len;
 409                ci->i_xattrs.vals_size -= xattr->val_len;
 410        }
 411        ci->i_xattrs.names_size += name_len;
 412        ci->i_xattrs.vals_size += val_len;
 413        if (val)
 414                xattr->val = val;
 415        else
 416                xattr->val = "";
 417
 418        xattr->val_len = val_len;
 419        xattr->dirty = update_xattr;
 420        xattr->should_free_val = (val && update_xattr);
 421
 422        if (new) {
 423                rb_link_node(&xattr->node, parent, p);
 424                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 425                dout("__set_xattr_val p=%p\n", p);
 426        }
 427
 428        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 429             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 430
 431        return 0;
 432}
 433
 434static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 435                           const char *name)
 436{
 437        struct rb_node **p;
 438        struct rb_node *parent = NULL;
 439        struct ceph_inode_xattr *xattr = NULL;
 440        int name_len = strlen(name);
 441        int c;
 442
 443        p = &ci->i_xattrs.index.rb_node;
 444        while (*p) {
 445                parent = *p;
 446                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 447                c = strncmp(name, xattr->name, xattr->name_len);
 448                if (c == 0 && name_len > xattr->name_len)
 449                        c = 1;
 450                if (c < 0)
 451                        p = &(*p)->rb_left;
 452                else if (c > 0)
 453                        p = &(*p)->rb_right;
 454                else {
 455                        dout("__get_xattr %s: found %.*s\n", name,
 456                             xattr->val_len, xattr->val);
 457                        return xattr;
 458                }
 459        }
 460
 461        dout("__get_xattr %s: not found\n", name);
 462
 463        return NULL;
 464}
 465
 466static void __free_xattr(struct ceph_inode_xattr *xattr)
 467{
 468        BUG_ON(!xattr);
 469
 470        if (xattr->should_free_name)
 471                kfree((void *)xattr->name);
 472        if (xattr->should_free_val)
 473                kfree((void *)xattr->val);
 474
 475        kfree(xattr);
 476}
 477
 478static int __remove_xattr(struct ceph_inode_info *ci,
 479                          struct ceph_inode_xattr *xattr)
 480{
 481        if (!xattr)
 482                return -ENODATA;
 483
 484        rb_erase(&xattr->node, &ci->i_xattrs.index);
 485
 486        if (xattr->should_free_name)
 487                kfree((void *)xattr->name);
 488        if (xattr->should_free_val)
 489                kfree((void *)xattr->val);
 490
 491        ci->i_xattrs.names_size -= xattr->name_len;
 492        ci->i_xattrs.vals_size -= xattr->val_len;
 493        ci->i_xattrs.count--;
 494        kfree(xattr);
 495
 496        return 0;
 497}
 498
 499static int __remove_xattr_by_name(struct ceph_inode_info *ci,
 500                           const char *name)
 501{
 502        struct rb_node **p;
 503        struct ceph_inode_xattr *xattr;
 504        int err;
 505
 506        p = &ci->i_xattrs.index.rb_node;
 507        xattr = __get_xattr(ci, name);
 508        err = __remove_xattr(ci, xattr);
 509        return err;
 510}
 511
 512static char *__copy_xattr_names(struct ceph_inode_info *ci,
 513                                char *dest)
 514{
 515        struct rb_node *p;
 516        struct ceph_inode_xattr *xattr = NULL;
 517
 518        p = rb_first(&ci->i_xattrs.index);
 519        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 520
 521        while (p) {
 522                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 523                memcpy(dest, xattr->name, xattr->name_len);
 524                dest[xattr->name_len] = '\0';
 525
 526                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 527                     xattr->name_len, ci->i_xattrs.names_size);
 528
 529                dest += xattr->name_len + 1;
 530                p = rb_next(p);
 531        }
 532
 533        return dest;
 534}
 535
 536void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 537{
 538        struct rb_node *p, *tmp;
 539        struct ceph_inode_xattr *xattr = NULL;
 540
 541        p = rb_first(&ci->i_xattrs.index);
 542
 543        dout("__ceph_destroy_xattrs p=%p\n", p);
 544
 545        while (p) {
 546                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 547                tmp = p;
 548                p = rb_next(tmp);
 549                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 550                     xattr->name_len, xattr->name);
 551                rb_erase(tmp, &ci->i_xattrs.index);
 552
 553                __free_xattr(xattr);
 554        }
 555
 556        ci->i_xattrs.names_size = 0;
 557        ci->i_xattrs.vals_size = 0;
 558        ci->i_xattrs.index_version = 0;
 559        ci->i_xattrs.count = 0;
 560        ci->i_xattrs.index = RB_ROOT;
 561}
 562
 563static int __build_xattrs(struct inode *inode)
 564        __releases(ci->i_ceph_lock)
 565        __acquires(ci->i_ceph_lock)
 566{
 567        u32 namelen;
 568        u32 numattr = 0;
 569        void *p, *end;
 570        u32 len;
 571        const char *name, *val;
 572        struct ceph_inode_info *ci = ceph_inode(inode);
 573        int xattr_version;
 574        struct ceph_inode_xattr **xattrs = NULL;
 575        int err = 0;
 576        int i;
 577
 578        dout("__build_xattrs() len=%d\n",
 579             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 580
 581        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 582                return 0; /* already built */
 583
 584        __ceph_destroy_xattrs(ci);
 585
 586start:
 587        /* updated internal xattr rb tree */
 588        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 589                p = ci->i_xattrs.blob->vec.iov_base;
 590                end = p + ci->i_xattrs.blob->vec.iov_len;
 591                ceph_decode_32_safe(&p, end, numattr, bad);
 592                xattr_version = ci->i_xattrs.version;
 593                spin_unlock(&ci->i_ceph_lock);
 594
 595                xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
 596                                 GFP_NOFS);
 597                err = -ENOMEM;
 598                if (!xattrs)
 599                        goto bad_lock;
 600
 601                for (i = 0; i < numattr; i++) {
 602                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 603                                            GFP_NOFS);
 604                        if (!xattrs[i])
 605                                goto bad_lock;
 606                }
 607
 608                spin_lock(&ci->i_ceph_lock);
 609                if (ci->i_xattrs.version != xattr_version) {
 610                        /* lost a race, retry */
 611                        for (i = 0; i < numattr; i++)
 612                                kfree(xattrs[i]);
 613                        kfree(xattrs);
 614                        xattrs = NULL;
 615                        goto start;
 616                }
 617                err = -EIO;
 618                while (numattr--) {
 619                        ceph_decode_32_safe(&p, end, len, bad);
 620                        namelen = len;
 621                        name = p;
 622                        p += len;
 623                        ceph_decode_32_safe(&p, end, len, bad);
 624                        val = p;
 625                        p += len;
 626
 627                        err = __set_xattr(ci, name, namelen, val, len,
 628                                          0, 0, &xattrs[numattr]);
 629
 630                        if (err < 0)
 631                                goto bad;
 632                }
 633                kfree(xattrs);
 634        }
 635        ci->i_xattrs.index_version = ci->i_xattrs.version;
 636        ci->i_xattrs.dirty = false;
 637
 638        return err;
 639bad_lock:
 640        spin_lock(&ci->i_ceph_lock);
 641bad:
 642        if (xattrs) {
 643                for (i = 0; i < numattr; i++)
 644                        kfree(xattrs[i]);
 645                kfree(xattrs);
 646        }
 647        ci->i_xattrs.names_size = 0;
 648        return err;
 649}
 650
 651static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 652                                    int val_size)
 653{
 654        /*
 655         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 656         * 4 bytes per each value
 657         */
 658        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 659                             ci->i_xattrs.names_size +
 660                             ci->i_xattrs.vals_size;
 661        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 662             ci->i_xattrs.count, ci->i_xattrs.names_size,
 663             ci->i_xattrs.vals_size);
 664
 665        if (name_size)
 666                size += 4 + 4 + name_size + val_size;
 667
 668        return size;
 669}
 670
 671/*
 672 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 673 * and swap into place.
 674 */
 675void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 676{
 677        struct rb_node *p;
 678        struct ceph_inode_xattr *xattr = NULL;
 679        void *dest;
 680
 681        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 682        if (ci->i_xattrs.dirty) {
 683                int need = __get_required_blob_size(ci, 0, 0);
 684
 685                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 686
 687                p = rb_first(&ci->i_xattrs.index);
 688                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 689
 690                ceph_encode_32(&dest, ci->i_xattrs.count);
 691                while (p) {
 692                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 693
 694                        ceph_encode_32(&dest, xattr->name_len);
 695                        memcpy(dest, xattr->name, xattr->name_len);
 696                        dest += xattr->name_len;
 697                        ceph_encode_32(&dest, xattr->val_len);
 698                        memcpy(dest, xattr->val, xattr->val_len);
 699                        dest += xattr->val_len;
 700
 701                        p = rb_next(p);
 702                }
 703
 704                /* adjust buffer len; it may be larger than we need */
 705                ci->i_xattrs.prealloc_blob->vec.iov_len =
 706                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 707
 708                if (ci->i_xattrs.blob)
 709                        ceph_buffer_put(ci->i_xattrs.blob);
 710                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 711                ci->i_xattrs.prealloc_blob = NULL;
 712                ci->i_xattrs.dirty = false;
 713                ci->i_xattrs.version++;
 714        }
 715}
 716
 717ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 718                      size_t size)
 719{
 720        struct ceph_inode_info *ci = ceph_inode(inode);
 721        int err;
 722        struct ceph_inode_xattr *xattr;
 723        struct ceph_vxattr *vxattr = NULL;
 724
 725        if (!ceph_is_valid_xattr(name))
 726                return -ENODATA;
 727
 728        /* let's see if a virtual xattr was requested */
 729        vxattr = ceph_match_vxattr(inode, name);
 730        if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
 731                err = vxattr->getxattr_cb(ci, value, size);
 732                return err;
 733        }
 734
 735        spin_lock(&ci->i_ceph_lock);
 736        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 737             ci->i_xattrs.version, ci->i_xattrs.index_version);
 738
 739        if (ci->i_xattrs.version == 0 ||
 740            !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
 741                spin_unlock(&ci->i_ceph_lock);
 742                /* get xattrs from mds (if we don't already have them) */
 743                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 744                if (err)
 745                        return err;
 746                spin_lock(&ci->i_ceph_lock);
 747        }
 748
 749        err = __build_xattrs(inode);
 750        if (err < 0)
 751                goto out;
 752
 753        err = -ENODATA;  /* == ENOATTR */
 754        xattr = __get_xattr(ci, name);
 755        if (!xattr)
 756                goto out;
 757
 758        err = -ERANGE;
 759        if (size && size < xattr->val_len)
 760                goto out;
 761
 762        err = xattr->val_len;
 763        if (size == 0)
 764                goto out;
 765
 766        memcpy(value, xattr->val, xattr->val_len);
 767
 768out:
 769        spin_unlock(&ci->i_ceph_lock);
 770        return err;
 771}
 772
 773ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 774                      size_t size)
 775{
 776        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 777                return generic_getxattr(dentry, name, value, size);
 778
 779        return __ceph_getxattr(d_inode(dentry), name, value, size);
 780}
 781
 782ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 783{
 784        struct inode *inode = d_inode(dentry);
 785        struct ceph_inode_info *ci = ceph_inode(inode);
 786        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
 787        u32 vir_namelen = 0;
 788        u32 namelen;
 789        int err;
 790        u32 len;
 791        int i;
 792
 793        spin_lock(&ci->i_ceph_lock);
 794        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 795             ci->i_xattrs.version, ci->i_xattrs.index_version);
 796
 797        if (ci->i_xattrs.version == 0 ||
 798            !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
 799                spin_unlock(&ci->i_ceph_lock);
 800                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 801                if (err)
 802                        return err;
 803                spin_lock(&ci->i_ceph_lock);
 804        }
 805
 806        err = __build_xattrs(inode);
 807        if (err < 0)
 808                goto out;
 809        /*
 810         * Start with virtual dir xattr names (if any) (including
 811         * terminating '\0' characters for each).
 812         */
 813        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 814
 815        /* adding 1 byte per each variable due to the null termination */
 816        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
 817        err = -ERANGE;
 818        if (size && vir_namelen + namelen > size)
 819                goto out;
 820
 821        err = namelen + vir_namelen;
 822        if (size == 0)
 823                goto out;
 824
 825        names = __copy_xattr_names(ci, names);
 826
 827        /* virtual xattr names, too */
 828        err = namelen;
 829        if (vxattrs) {
 830                for (i = 0; vxattrs[i].name; i++) {
 831                        if (!vxattrs[i].hidden &&
 832                            !(vxattrs[i].exists_cb &&
 833                              !vxattrs[i].exists_cb(ci))) {
 834                                len = sprintf(names, "%s", vxattrs[i].name);
 835                                names += len + 1;
 836                                err += len + 1;
 837                        }
 838                }
 839        }
 840
 841out:
 842        spin_unlock(&ci->i_ceph_lock);
 843        return err;
 844}
 845
 846static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 847                              const char *value, size_t size, int flags)
 848{
 849        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 850        struct inode *inode = d_inode(dentry);
 851        struct ceph_inode_info *ci = ceph_inode(inode);
 852        struct ceph_mds_request *req;
 853        struct ceph_mds_client *mdsc = fsc->mdsc;
 854        struct ceph_pagelist *pagelist = NULL;
 855        int err;
 856
 857        if (size > 0) {
 858                /* copy value into pagelist */
 859                pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
 860                if (!pagelist)
 861                        return -ENOMEM;
 862
 863                ceph_pagelist_init(pagelist);
 864                err = ceph_pagelist_append(pagelist, value, size);
 865                if (err)
 866                        goto out;
 867        } else if (!value) {
 868                flags |= CEPH_XATTR_REMOVE;
 869        }
 870
 871        dout("setxattr value=%.*s\n", (int)size, value);
 872
 873        /* do request */
 874        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
 875                                       USE_AUTH_MDS);
 876        if (IS_ERR(req)) {
 877                err = PTR_ERR(req);
 878                goto out;
 879        }
 880
 881        req->r_args.setxattr.flags = cpu_to_le32(flags);
 882        req->r_path2 = kstrdup(name, GFP_NOFS);
 883        if (!req->r_path2) {
 884                ceph_mdsc_put_request(req);
 885                err = -ENOMEM;
 886                goto out;
 887        }
 888
 889        req->r_pagelist = pagelist;
 890        pagelist = NULL;
 891
 892        req->r_inode = inode;
 893        ihold(inode);
 894        req->r_num_caps = 1;
 895        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 896
 897        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 898        err = ceph_mdsc_do_request(mdsc, NULL, req);
 899        ceph_mdsc_put_request(req);
 900        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 901
 902out:
 903        if (pagelist)
 904                ceph_pagelist_release(pagelist);
 905        return err;
 906}
 907
 908int __ceph_setxattr(struct dentry *dentry, const char *name,
 909                        const void *value, size_t size, int flags)
 910{
 911        struct inode *inode = d_inode(dentry);
 912        struct ceph_vxattr *vxattr;
 913        struct ceph_inode_info *ci = ceph_inode(inode);
 914        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
 915        struct ceph_cap_flush *prealloc_cf = NULL;
 916        int issued;
 917        int err;
 918        int dirty = 0;
 919        int name_len = strlen(name);
 920        int val_len = size;
 921        char *newname = NULL;
 922        char *newval = NULL;
 923        struct ceph_inode_xattr *xattr = NULL;
 924        int required_blob_size;
 925        bool lock_snap_rwsem = false;
 926
 927        if (!ceph_is_valid_xattr(name))
 928                return -EOPNOTSUPP;
 929
 930        vxattr = ceph_match_vxattr(inode, name);
 931        if (vxattr && vxattr->readonly)
 932                return -EOPNOTSUPP;
 933
 934        /* pass any unhandled ceph.* xattrs through to the MDS */
 935        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
 936                goto do_sync_unlocked;
 937
 938        /* preallocate memory for xattr name, value, index node */
 939        err = -ENOMEM;
 940        newname = kmemdup(name, name_len + 1, GFP_NOFS);
 941        if (!newname)
 942                goto out;
 943
 944        if (val_len) {
 945                newval = kmemdup(value, val_len, GFP_NOFS);
 946                if (!newval)
 947                        goto out;
 948        }
 949
 950        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
 951        if (!xattr)
 952                goto out;
 953
 954        prealloc_cf = ceph_alloc_cap_flush();
 955        if (!prealloc_cf)
 956                goto out;
 957
 958        spin_lock(&ci->i_ceph_lock);
 959retry:
 960        issued = __ceph_caps_issued(ci, NULL);
 961        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
 962                goto do_sync;
 963
 964        if (!lock_snap_rwsem && !ci->i_head_snapc) {
 965                lock_snap_rwsem = true;
 966                if (!down_read_trylock(&mdsc->snap_rwsem)) {
 967                        spin_unlock(&ci->i_ceph_lock);
 968                        down_read(&mdsc->snap_rwsem);
 969                        spin_lock(&ci->i_ceph_lock);
 970                        goto retry;
 971                }
 972        }
 973
 974        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
 975        __build_xattrs(inode);
 976
 977        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
 978
 979        if (!ci->i_xattrs.prealloc_blob ||
 980            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 981                struct ceph_buffer *blob;
 982
 983                spin_unlock(&ci->i_ceph_lock);
 984                dout(" preaallocating new blob size=%d\n", required_blob_size);
 985                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
 986                if (!blob)
 987                        goto do_sync_unlocked;
 988                spin_lock(&ci->i_ceph_lock);
 989                if (ci->i_xattrs.prealloc_blob)
 990                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
 991                ci->i_xattrs.prealloc_blob = blob;
 992                goto retry;
 993        }
 994
 995        err = __set_xattr(ci, newname, name_len, newval, val_len,
 996                          flags, value ? 1 : -1, &xattr);
 997
 998        if (!err) {
 999                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1000                                               &prealloc_cf);
1001                ci->i_xattrs.dirty = true;
1002                inode->i_ctime = CURRENT_TIME;
1003        }
1004
1005        spin_unlock(&ci->i_ceph_lock);
1006        if (lock_snap_rwsem)
1007                up_read(&mdsc->snap_rwsem);
1008        if (dirty)
1009                __mark_inode_dirty(inode, dirty);
1010        ceph_free_cap_flush(prealloc_cf);
1011        return err;
1012
1013do_sync:
1014        spin_unlock(&ci->i_ceph_lock);
1015do_sync_unlocked:
1016        if (lock_snap_rwsem)
1017                up_read(&mdsc->snap_rwsem);
1018        err = ceph_sync_setxattr(dentry, name, value, size, flags);
1019out:
1020        ceph_free_cap_flush(prealloc_cf);
1021        kfree(newname);
1022        kfree(newval);
1023        kfree(xattr);
1024        return err;
1025}
1026
1027int ceph_setxattr(struct dentry *dentry, const char *name,
1028                  const void *value, size_t size, int flags)
1029{
1030        if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
1031                return -EROFS;
1032
1033        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1034                return generic_setxattr(dentry, name, value, size, flags);
1035
1036        if (size == 0)
1037                value = "";  /* empty EA, do not remove */
1038
1039        return __ceph_setxattr(dentry, name, value, size, flags);
1040}
1041
1042static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1043{
1044        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
1045        struct ceph_mds_client *mdsc = fsc->mdsc;
1046        struct inode *inode = d_inode(dentry);
1047        struct ceph_mds_request *req;
1048        int err;
1049
1050        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
1051                                       USE_AUTH_MDS);
1052        if (IS_ERR(req))
1053                return PTR_ERR(req);
1054        req->r_path2 = kstrdup(name, GFP_NOFS);
1055        if (!req->r_path2)
1056                return -ENOMEM;
1057
1058        req->r_inode = inode;
1059        ihold(inode);
1060        req->r_num_caps = 1;
1061        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1062        err = ceph_mdsc_do_request(mdsc, NULL, req);
1063        ceph_mdsc_put_request(req);
1064        return err;
1065}
1066
1067int __ceph_removexattr(struct dentry *dentry, const char *name)
1068{
1069        struct inode *inode = d_inode(dentry);
1070        struct ceph_vxattr *vxattr;
1071        struct ceph_inode_info *ci = ceph_inode(inode);
1072        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1073        struct ceph_cap_flush *prealloc_cf = NULL;
1074        int issued;
1075        int err;
1076        int required_blob_size;
1077        int dirty;
1078        bool lock_snap_rwsem = false;
1079
1080        if (!ceph_is_valid_xattr(name))
1081                return -EOPNOTSUPP;
1082
1083        vxattr = ceph_match_vxattr(inode, name);
1084        if (vxattr && vxattr->readonly)
1085                return -EOPNOTSUPP;
1086
1087        /* pass any unhandled ceph.* xattrs through to the MDS */
1088        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1089                goto do_sync_unlocked;
1090
1091        prealloc_cf = ceph_alloc_cap_flush();
1092        if (!prealloc_cf)
1093                return -ENOMEM;
1094
1095        err = -ENOMEM;
1096        spin_lock(&ci->i_ceph_lock);
1097retry:
1098        issued = __ceph_caps_issued(ci, NULL);
1099        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1100                goto do_sync;
1101
1102        if (!lock_snap_rwsem && !ci->i_head_snapc) {
1103                lock_snap_rwsem = true;
1104                if (!down_read_trylock(&mdsc->snap_rwsem)) {
1105                        spin_unlock(&ci->i_ceph_lock);
1106                        down_read(&mdsc->snap_rwsem);
1107                        spin_lock(&ci->i_ceph_lock);
1108                        goto retry;
1109                }
1110        }
1111
1112        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1113
1114        __build_xattrs(inode);
1115
1116        required_blob_size = __get_required_blob_size(ci, 0, 0);
1117
1118        if (!ci->i_xattrs.prealloc_blob ||
1119            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1120                struct ceph_buffer *blob;
1121
1122                spin_unlock(&ci->i_ceph_lock);
1123                dout(" preaallocating new blob size=%d\n", required_blob_size);
1124                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1125                if (!blob)
1126                        goto do_sync_unlocked;
1127                spin_lock(&ci->i_ceph_lock);
1128                if (ci->i_xattrs.prealloc_blob)
1129                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1130                ci->i_xattrs.prealloc_blob = blob;
1131                goto retry;
1132        }
1133
1134        err = __remove_xattr_by_name(ceph_inode(inode), name);
1135
1136        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1137                                       &prealloc_cf);
1138        ci->i_xattrs.dirty = true;
1139        inode->i_ctime = CURRENT_TIME;
1140        spin_unlock(&ci->i_ceph_lock);
1141        if (lock_snap_rwsem)
1142                up_read(&mdsc->snap_rwsem);
1143        if (dirty)
1144                __mark_inode_dirty(inode, dirty);
1145        ceph_free_cap_flush(prealloc_cf);
1146        return err;
1147do_sync:
1148        spin_unlock(&ci->i_ceph_lock);
1149do_sync_unlocked:
1150        if (lock_snap_rwsem)
1151                up_read(&mdsc->snap_rwsem);
1152        ceph_free_cap_flush(prealloc_cf);
1153        err = ceph_send_removexattr(dentry, name);
1154        return err;
1155}
1156
1157int ceph_removexattr(struct dentry *dentry, const char *name)
1158{
1159        if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
1160                return -EROFS;
1161
1162        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1163                return generic_removexattr(dentry, name);
1164
1165        return __ceph_removexattr(dentry, name);
1166}
1167