linux/fs/ceph/xattr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/ceph/ceph_debug.h>
   3#include <linux/ceph/pagelist.h>
   4
   5#include "super.h"
   6#include "mds_client.h"
   7
   8#include <linux/ceph/decode.h>
   9
  10#include <linux/xattr.h>
  11#include <linux/posix_acl_xattr.h>
  12#include <linux/slab.h>
  13
  14#define XATTR_CEPH_PREFIX "ceph."
  15#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  16
  17static int __remove_xattr(struct ceph_inode_info *ci,
  18                          struct ceph_inode_xattr *xattr);
  19
  20static const struct xattr_handler ceph_other_xattr_handler;
  21
  22/*
  23 * List of handlers for synthetic system.* attributes. Other
  24 * attributes are handled directly.
  25 */
  26const struct xattr_handler *ceph_xattr_handlers[] = {
  27#ifdef CONFIG_CEPH_FS_POSIX_ACL
  28        &posix_acl_access_xattr_handler,
  29        &posix_acl_default_xattr_handler,
  30#endif
  31        &ceph_other_xattr_handler,
  32        NULL,
  33};
  34
  35static bool ceph_is_valid_xattr(const char *name)
  36{
  37        return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  38               !strncmp(name, XATTR_SECURITY_PREFIX,
  39                        XATTR_SECURITY_PREFIX_LEN) ||
  40               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  41               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  42}
  43
  44/*
  45 * These define virtual xattrs exposing the recursive directory
  46 * statistics and layout metadata.
  47 */
  48struct ceph_vxattr {
  49        char *name;
  50        size_t name_size;       /* strlen(name) + 1 (for '\0') */
  51        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  52                              size_t size);
  53        bool readonly, hidden;
  54        bool (*exists_cb)(struct ceph_inode_info *ci);
  55};
  56
  57/* layouts */
  58
  59static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  60{
  61        struct ceph_file_layout *fl = &ci->i_layout;
  62        return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
  63                fl->object_size > 0 || fl->pool_id >= 0 ||
  64                rcu_dereference_raw(fl->pool_ns) != NULL);
  65}
  66
  67static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  68                                   size_t size)
  69{
  70        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  71        struct ceph_osd_client *osdc = &fsc->client->osdc;
  72        struct ceph_string *pool_ns;
  73        s64 pool = ci->i_layout.pool_id;
  74        const char *pool_name;
  75        const char *ns_field = " pool_namespace=";
  76        char buf[128];
  77        size_t len, total_len = 0;
  78        int ret;
  79
  80        pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
  81
  82        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  83        down_read(&osdc->lock);
  84        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  85        if (pool_name) {
  86                len = snprintf(buf, sizeof(buf),
  87                "stripe_unit=%u stripe_count=%u object_size=%u pool=",
  88                ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  89                ci->i_layout.object_size);
  90                total_len = len + strlen(pool_name);
  91        } else {
  92                len = snprintf(buf, sizeof(buf),
  93                "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
  94                ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  95                ci->i_layout.object_size, (unsigned long long)pool);
  96                total_len = len;
  97        }
  98
  99        if (pool_ns)
 100                total_len += strlen(ns_field) + pool_ns->len;
 101
 102        if (!size) {
 103                ret = total_len;
 104        } else if (total_len > size) {
 105                ret = -ERANGE;
 106        } else {
 107                memcpy(val, buf, len);
 108                ret = len;
 109                if (pool_name) {
 110                        len = strlen(pool_name);
 111                        memcpy(val + ret, pool_name, len);
 112                        ret += len;
 113                }
 114                if (pool_ns) {
 115                        len = strlen(ns_field);
 116                        memcpy(val + ret, ns_field, len);
 117                        ret += len;
 118                        memcpy(val + ret, pool_ns->str, pool_ns->len);
 119                        ret += pool_ns->len;
 120                }
 121        }
 122        up_read(&osdc->lock);
 123        ceph_put_string(pool_ns);
 124        return ret;
 125}
 126
 127static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
 128                                               char *val, size_t size)
 129{
 130        return snprintf(val, size, "%u", ci->i_layout.stripe_unit);
 131}
 132
 133static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
 134                                                char *val, size_t size)
 135{
 136        return snprintf(val, size, "%u", ci->i_layout.stripe_count);
 137}
 138
 139static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
 140                                               char *val, size_t size)
 141{
 142        return snprintf(val, size, "%u", ci->i_layout.object_size);
 143}
 144
 145static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 146                                        char *val, size_t size)
 147{
 148        int ret;
 149        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
 150        struct ceph_osd_client *osdc = &fsc->client->osdc;
 151        s64 pool = ci->i_layout.pool_id;
 152        const char *pool_name;
 153
 154        down_read(&osdc->lock);
 155        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 156        if (pool_name)
 157                ret = snprintf(val, size, "%s", pool_name);
 158        else
 159                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
 160        up_read(&osdc->lock);
 161        return ret;
 162}
 163
 164static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
 165                                                  char *val, size_t size)
 166{
 167        int ret = 0;
 168        struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
 169        if (ns) {
 170                ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str);
 171                ceph_put_string(ns);
 172        }
 173        return ret;
 174}
 175
 176/* directories */
 177
 178static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
 179                                        size_t size)
 180{
 181        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
 182}
 183
 184static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
 185                                      size_t size)
 186{
 187        return snprintf(val, size, "%lld", ci->i_files);
 188}
 189
 190static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
 191                                        size_t size)
 192{
 193        return snprintf(val, size, "%lld", ci->i_subdirs);
 194}
 195
 196static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
 197                                         size_t size)
 198{
 199        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
 200}
 201
 202static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
 203                                       size_t size)
 204{
 205        return snprintf(val, size, "%lld", ci->i_rfiles);
 206}
 207
 208static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
 209                                         size_t size)
 210{
 211        return snprintf(val, size, "%lld", ci->i_rsubdirs);
 212}
 213
 214static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
 215                                       size_t size)
 216{
 217        return snprintf(val, size, "%lld", ci->i_rbytes);
 218}
 219
 220static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 221                                       size_t size)
 222{
 223        return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
 224                        (long)ci->i_rctime.tv_nsec);
 225}
 226
 227
 228#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
 229#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
 230        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
 231
 232#define XATTR_NAME_CEPH(_type, _name)                                   \
 233        {                                                               \
 234                .name = CEPH_XATTR_NAME(_type, _name),                  \
 235                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
 236                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
 237                .readonly = true,                               \
 238                .hidden = false,                                \
 239                .exists_cb = NULL,                      \
 240        }
 241#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
 242        {                                                               \
 243                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
 244                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
 245                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
 246                .readonly = false,                              \
 247                .hidden = true,                 \
 248                .exists_cb = ceph_vxattrcb_layout_exists,       \
 249        }
 250
 251static struct ceph_vxattr ceph_dir_vxattrs[] = {
 252        {
 253                .name = "ceph.dir.layout",
 254                .name_size = sizeof("ceph.dir.layout"),
 255                .getxattr_cb = ceph_vxattrcb_layout,
 256                .readonly = false,
 257                .hidden = true,
 258                .exists_cb = ceph_vxattrcb_layout_exists,
 259        },
 260        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
 261        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
 262        XATTR_LAYOUT_FIELD(dir, layout, object_size),
 263        XATTR_LAYOUT_FIELD(dir, layout, pool),
 264        XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
 265        XATTR_NAME_CEPH(dir, entries),
 266        XATTR_NAME_CEPH(dir, files),
 267        XATTR_NAME_CEPH(dir, subdirs),
 268        XATTR_NAME_CEPH(dir, rentries),
 269        XATTR_NAME_CEPH(dir, rfiles),
 270        XATTR_NAME_CEPH(dir, rsubdirs),
 271        XATTR_NAME_CEPH(dir, rbytes),
 272        XATTR_NAME_CEPH(dir, rctime),
 273        { .name = NULL, 0 }     /* Required table terminator */
 274};
 275static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 276
 277/* files */
 278
 279static struct ceph_vxattr ceph_file_vxattrs[] = {
 280        {
 281                .name = "ceph.file.layout",
 282                .name_size = sizeof("ceph.file.layout"),
 283                .getxattr_cb = ceph_vxattrcb_layout,
 284                .readonly = false,
 285                .hidden = true,
 286                .exists_cb = ceph_vxattrcb_layout_exists,
 287        },
 288        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
 289        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
 290        XATTR_LAYOUT_FIELD(file, layout, object_size),
 291        XATTR_LAYOUT_FIELD(file, layout, pool),
 292        XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
 293        { .name = NULL, 0 }     /* Required table terminator */
 294};
 295static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
 296
 297static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
 298{
 299        if (S_ISDIR(inode->i_mode))
 300                return ceph_dir_vxattrs;
 301        else if (S_ISREG(inode->i_mode))
 302                return ceph_file_vxattrs;
 303        return NULL;
 304}
 305
 306static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
 307{
 308        if (vxattrs == ceph_dir_vxattrs)
 309                return ceph_dir_vxattrs_name_size;
 310        if (vxattrs == ceph_file_vxattrs)
 311                return ceph_file_vxattrs_name_size;
 312        BUG_ON(vxattrs);
 313        return 0;
 314}
 315
 316/*
 317 * Compute the aggregate size (including terminating '\0') of all
 318 * virtual extended attribute names in the given vxattr table.
 319 */
 320static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
 321{
 322        struct ceph_vxattr *vxattr;
 323        size_t size = 0;
 324
 325        for (vxattr = vxattrs; vxattr->name; vxattr++)
 326                if (!vxattr->hidden)
 327                        size += vxattr->name_size;
 328
 329        return size;
 330}
 331
 332/* Routines called at initialization and exit time */
 333
 334void __init ceph_xattr_init(void)
 335{
 336        ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
 337        ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
 338}
 339
 340void ceph_xattr_exit(void)
 341{
 342        ceph_dir_vxattrs_name_size = 0;
 343        ceph_file_vxattrs_name_size = 0;
 344}
 345
 346static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 347                                                const char *name)
 348{
 349        struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
 350
 351        if (vxattr) {
 352                while (vxattr->name) {
 353                        if (!strcmp(vxattr->name, name))
 354                                return vxattr;
 355                        vxattr++;
 356                }
 357        }
 358
 359        return NULL;
 360}
 361
 362static int __set_xattr(struct ceph_inode_info *ci,
 363                           const char *name, int name_len,
 364                           const char *val, int val_len,
 365                           int flags, int update_xattr,
 366                           struct ceph_inode_xattr **newxattr)
 367{
 368        struct rb_node **p;
 369        struct rb_node *parent = NULL;
 370        struct ceph_inode_xattr *xattr = NULL;
 371        int c;
 372        int new = 0;
 373
 374        p = &ci->i_xattrs.index.rb_node;
 375        while (*p) {
 376                parent = *p;
 377                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 378                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 379                if (c < 0)
 380                        p = &(*p)->rb_left;
 381                else if (c > 0)
 382                        p = &(*p)->rb_right;
 383                else {
 384                        if (name_len == xattr->name_len)
 385                                break;
 386                        else if (name_len < xattr->name_len)
 387                                p = &(*p)->rb_left;
 388                        else
 389                                p = &(*p)->rb_right;
 390                }
 391                xattr = NULL;
 392        }
 393
 394        if (update_xattr) {
 395                int err = 0;
 396
 397                if (xattr && (flags & XATTR_CREATE))
 398                        err = -EEXIST;
 399                else if (!xattr && (flags & XATTR_REPLACE))
 400                        err = -ENODATA;
 401                if (err) {
 402                        kfree(name);
 403                        kfree(val);
 404                        kfree(*newxattr);
 405                        return err;
 406                }
 407                if (update_xattr < 0) {
 408                        if (xattr)
 409                                __remove_xattr(ci, xattr);
 410                        kfree(name);
 411                        kfree(*newxattr);
 412                        return 0;
 413                }
 414        }
 415
 416        if (!xattr) {
 417                new = 1;
 418                xattr = *newxattr;
 419                xattr->name = name;
 420                xattr->name_len = name_len;
 421                xattr->should_free_name = update_xattr;
 422
 423                ci->i_xattrs.count++;
 424                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 425        } else {
 426                kfree(*newxattr);
 427                *newxattr = NULL;
 428                if (xattr->should_free_val)
 429                        kfree((void *)xattr->val);
 430
 431                if (update_xattr) {
 432                        kfree((void *)name);
 433                        name = xattr->name;
 434                }
 435                ci->i_xattrs.names_size -= xattr->name_len;
 436                ci->i_xattrs.vals_size -= xattr->val_len;
 437        }
 438        ci->i_xattrs.names_size += name_len;
 439        ci->i_xattrs.vals_size += val_len;
 440        if (val)
 441                xattr->val = val;
 442        else
 443                xattr->val = "";
 444
 445        xattr->val_len = val_len;
 446        xattr->dirty = update_xattr;
 447        xattr->should_free_val = (val && update_xattr);
 448
 449        if (new) {
 450                rb_link_node(&xattr->node, parent, p);
 451                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 452                dout("__set_xattr_val p=%p\n", p);
 453        }
 454
 455        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 456             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 457
 458        return 0;
 459}
 460
 461static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 462                           const char *name)
 463{
 464        struct rb_node **p;
 465        struct rb_node *parent = NULL;
 466        struct ceph_inode_xattr *xattr = NULL;
 467        int name_len = strlen(name);
 468        int c;
 469
 470        p = &ci->i_xattrs.index.rb_node;
 471        while (*p) {
 472                parent = *p;
 473                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 474                c = strncmp(name, xattr->name, xattr->name_len);
 475                if (c == 0 && name_len > xattr->name_len)
 476                        c = 1;
 477                if (c < 0)
 478                        p = &(*p)->rb_left;
 479                else if (c > 0)
 480                        p = &(*p)->rb_right;
 481                else {
 482                        dout("__get_xattr %s: found %.*s\n", name,
 483                             xattr->val_len, xattr->val);
 484                        return xattr;
 485                }
 486        }
 487
 488        dout("__get_xattr %s: not found\n", name);
 489
 490        return NULL;
 491}
 492
 493static void __free_xattr(struct ceph_inode_xattr *xattr)
 494{
 495        BUG_ON(!xattr);
 496
 497        if (xattr->should_free_name)
 498                kfree((void *)xattr->name);
 499        if (xattr->should_free_val)
 500                kfree((void *)xattr->val);
 501
 502        kfree(xattr);
 503}
 504
 505static int __remove_xattr(struct ceph_inode_info *ci,
 506                          struct ceph_inode_xattr *xattr)
 507{
 508        if (!xattr)
 509                return -ENODATA;
 510
 511        rb_erase(&xattr->node, &ci->i_xattrs.index);
 512
 513        if (xattr->should_free_name)
 514                kfree((void *)xattr->name);
 515        if (xattr->should_free_val)
 516                kfree((void *)xattr->val);
 517
 518        ci->i_xattrs.names_size -= xattr->name_len;
 519        ci->i_xattrs.vals_size -= xattr->val_len;
 520        ci->i_xattrs.count--;
 521        kfree(xattr);
 522
 523        return 0;
 524}
 525
 526static char *__copy_xattr_names(struct ceph_inode_info *ci,
 527                                char *dest)
 528{
 529        struct rb_node *p;
 530        struct ceph_inode_xattr *xattr = NULL;
 531
 532        p = rb_first(&ci->i_xattrs.index);
 533        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 534
 535        while (p) {
 536                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 537                memcpy(dest, xattr->name, xattr->name_len);
 538                dest[xattr->name_len] = '\0';
 539
 540                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 541                     xattr->name_len, ci->i_xattrs.names_size);
 542
 543                dest += xattr->name_len + 1;
 544                p = rb_next(p);
 545        }
 546
 547        return dest;
 548}
 549
 550void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 551{
 552        struct rb_node *p, *tmp;
 553        struct ceph_inode_xattr *xattr = NULL;
 554
 555        p = rb_first(&ci->i_xattrs.index);
 556
 557        dout("__ceph_destroy_xattrs p=%p\n", p);
 558
 559        while (p) {
 560                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 561                tmp = p;
 562                p = rb_next(tmp);
 563                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 564                     xattr->name_len, xattr->name);
 565                rb_erase(tmp, &ci->i_xattrs.index);
 566
 567                __free_xattr(xattr);
 568        }
 569
 570        ci->i_xattrs.names_size = 0;
 571        ci->i_xattrs.vals_size = 0;
 572        ci->i_xattrs.index_version = 0;
 573        ci->i_xattrs.count = 0;
 574        ci->i_xattrs.index = RB_ROOT;
 575}
 576
 577static int __build_xattrs(struct inode *inode)
 578        __releases(ci->i_ceph_lock)
 579        __acquires(ci->i_ceph_lock)
 580{
 581        u32 namelen;
 582        u32 numattr = 0;
 583        void *p, *end;
 584        u32 len;
 585        const char *name, *val;
 586        struct ceph_inode_info *ci = ceph_inode(inode);
 587        int xattr_version;
 588        struct ceph_inode_xattr **xattrs = NULL;
 589        int err = 0;
 590        int i;
 591
 592        dout("__build_xattrs() len=%d\n",
 593             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 594
 595        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 596                return 0; /* already built */
 597
 598        __ceph_destroy_xattrs(ci);
 599
 600start:
 601        /* updated internal xattr rb tree */
 602        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 603                p = ci->i_xattrs.blob->vec.iov_base;
 604                end = p + ci->i_xattrs.blob->vec.iov_len;
 605                ceph_decode_32_safe(&p, end, numattr, bad);
 606                xattr_version = ci->i_xattrs.version;
 607                spin_unlock(&ci->i_ceph_lock);
 608
 609                xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
 610                                 GFP_NOFS);
 611                err = -ENOMEM;
 612                if (!xattrs)
 613                        goto bad_lock;
 614
 615                for (i = 0; i < numattr; i++) {
 616                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 617                                            GFP_NOFS);
 618                        if (!xattrs[i])
 619                                goto bad_lock;
 620                }
 621
 622                spin_lock(&ci->i_ceph_lock);
 623                if (ci->i_xattrs.version != xattr_version) {
 624                        /* lost a race, retry */
 625                        for (i = 0; i < numattr; i++)
 626                                kfree(xattrs[i]);
 627                        kfree(xattrs);
 628                        xattrs = NULL;
 629                        goto start;
 630                }
 631                err = -EIO;
 632                while (numattr--) {
 633                        ceph_decode_32_safe(&p, end, len, bad);
 634                        namelen = len;
 635                        name = p;
 636                        p += len;
 637                        ceph_decode_32_safe(&p, end, len, bad);
 638                        val = p;
 639                        p += len;
 640
 641                        err = __set_xattr(ci, name, namelen, val, len,
 642                                          0, 0, &xattrs[numattr]);
 643
 644                        if (err < 0)
 645                                goto bad;
 646                }
 647                kfree(xattrs);
 648        }
 649        ci->i_xattrs.index_version = ci->i_xattrs.version;
 650        ci->i_xattrs.dirty = false;
 651
 652        return err;
 653bad_lock:
 654        spin_lock(&ci->i_ceph_lock);
 655bad:
 656        if (xattrs) {
 657                for (i = 0; i < numattr; i++)
 658                        kfree(xattrs[i]);
 659                kfree(xattrs);
 660        }
 661        ci->i_xattrs.names_size = 0;
 662        return err;
 663}
 664
 665static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 666                                    int val_size)
 667{
 668        /*
 669         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 670         * 4 bytes per each value
 671         */
 672        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 673                             ci->i_xattrs.names_size +
 674                             ci->i_xattrs.vals_size;
 675        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 676             ci->i_xattrs.count, ci->i_xattrs.names_size,
 677             ci->i_xattrs.vals_size);
 678
 679        if (name_size)
 680                size += 4 + 4 + name_size + val_size;
 681
 682        return size;
 683}
 684
 685/*
 686 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 687 * and swap into place.
 688 */
 689void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 690{
 691        struct rb_node *p;
 692        struct ceph_inode_xattr *xattr = NULL;
 693        void *dest;
 694
 695        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 696        if (ci->i_xattrs.dirty) {
 697                int need = __get_required_blob_size(ci, 0, 0);
 698
 699                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 700
 701                p = rb_first(&ci->i_xattrs.index);
 702                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 703
 704                ceph_encode_32(&dest, ci->i_xattrs.count);
 705                while (p) {
 706                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 707
 708                        ceph_encode_32(&dest, xattr->name_len);
 709                        memcpy(dest, xattr->name, xattr->name_len);
 710                        dest += xattr->name_len;
 711                        ceph_encode_32(&dest, xattr->val_len);
 712                        memcpy(dest, xattr->val, xattr->val_len);
 713                        dest += xattr->val_len;
 714
 715                        p = rb_next(p);
 716                }
 717
 718                /* adjust buffer len; it may be larger than we need */
 719                ci->i_xattrs.prealloc_blob->vec.iov_len =
 720                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 721
 722                if (ci->i_xattrs.blob)
 723                        ceph_buffer_put(ci->i_xattrs.blob);
 724                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 725                ci->i_xattrs.prealloc_blob = NULL;
 726                ci->i_xattrs.dirty = false;
 727                ci->i_xattrs.version++;
 728        }
 729}
 730
 731static inline int __get_request_mask(struct inode *in) {
 732        struct ceph_mds_request *req = current->journal_info;
 733        int mask = 0;
 734        if (req && req->r_target_inode == in) {
 735                if (req->r_op == CEPH_MDS_OP_LOOKUP ||
 736                    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
 737                    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
 738                    req->r_op == CEPH_MDS_OP_GETATTR) {
 739                        mask = le32_to_cpu(req->r_args.getattr.mask);
 740                } else if (req->r_op == CEPH_MDS_OP_OPEN ||
 741                           req->r_op == CEPH_MDS_OP_CREATE) {
 742                        mask = le32_to_cpu(req->r_args.open.mask);
 743                }
 744        }
 745        return mask;
 746}
 747
 748ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 749                      size_t size)
 750{
 751        struct ceph_inode_info *ci = ceph_inode(inode);
 752        struct ceph_inode_xattr *xattr;
 753        struct ceph_vxattr *vxattr = NULL;
 754        int req_mask;
 755        int err;
 756
 757        /* let's see if a virtual xattr was requested */
 758        vxattr = ceph_match_vxattr(inode, name);
 759        if (vxattr) {
 760                err = ceph_do_getattr(inode, 0, true);
 761                if (err)
 762                        return err;
 763                err = -ENODATA;
 764                if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
 765                        err = vxattr->getxattr_cb(ci, value, size);
 766                return err;
 767        }
 768
 769        req_mask = __get_request_mask(inode);
 770
 771        spin_lock(&ci->i_ceph_lock);
 772        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 773             ci->i_xattrs.version, ci->i_xattrs.index_version);
 774
 775        if (ci->i_xattrs.version == 0 ||
 776            !((req_mask & CEPH_CAP_XATTR_SHARED) ||
 777              __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
 778                spin_unlock(&ci->i_ceph_lock);
 779
 780                /* security module gets xattr while filling trace */
 781                if (current->journal_info) {
 782                        pr_warn_ratelimited("sync getxattr %p "
 783                                            "during filling trace\n", inode);
 784                        return -EBUSY;
 785                }
 786
 787                /* get xattrs from mds (if we don't already have them) */
 788                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 789                if (err)
 790                        return err;
 791                spin_lock(&ci->i_ceph_lock);
 792        }
 793
 794        err = __build_xattrs(inode);
 795        if (err < 0)
 796                goto out;
 797
 798        err = -ENODATA;  /* == ENOATTR */
 799        xattr = __get_xattr(ci, name);
 800        if (!xattr)
 801                goto out;
 802
 803        err = -ERANGE;
 804        if (size && size < xattr->val_len)
 805                goto out;
 806
 807        err = xattr->val_len;
 808        if (size == 0)
 809                goto out;
 810
 811        memcpy(value, xattr->val, xattr->val_len);
 812
 813        if (current->journal_info &&
 814            !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
 815                ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 816out:
 817        spin_unlock(&ci->i_ceph_lock);
 818        return err;
 819}
 820
 821ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 822{
 823        struct inode *inode = d_inode(dentry);
 824        struct ceph_inode_info *ci = ceph_inode(inode);
 825        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
 826        u32 vir_namelen = 0;
 827        u32 namelen;
 828        int err;
 829        u32 len;
 830        int i;
 831
 832        spin_lock(&ci->i_ceph_lock);
 833        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 834             ci->i_xattrs.version, ci->i_xattrs.index_version);
 835
 836        if (ci->i_xattrs.version == 0 ||
 837            !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
 838                spin_unlock(&ci->i_ceph_lock);
 839                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 840                if (err)
 841                        return err;
 842                spin_lock(&ci->i_ceph_lock);
 843        }
 844
 845        err = __build_xattrs(inode);
 846        if (err < 0)
 847                goto out;
 848        /*
 849         * Start with virtual dir xattr names (if any) (including
 850         * terminating '\0' characters for each).
 851         */
 852        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 853
 854        /* adding 1 byte per each variable due to the null termination */
 855        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
 856        err = -ERANGE;
 857        if (size && vir_namelen + namelen > size)
 858                goto out;
 859
 860        err = namelen + vir_namelen;
 861        if (size == 0)
 862                goto out;
 863
 864        names = __copy_xattr_names(ci, names);
 865
 866        /* virtual xattr names, too */
 867        err = namelen;
 868        if (vxattrs) {
 869                for (i = 0; vxattrs[i].name; i++) {
 870                        if (!vxattrs[i].hidden &&
 871                            !(vxattrs[i].exists_cb &&
 872                              !vxattrs[i].exists_cb(ci))) {
 873                                len = sprintf(names, "%s", vxattrs[i].name);
 874                                names += len + 1;
 875                                err += len + 1;
 876                        }
 877                }
 878        }
 879
 880out:
 881        spin_unlock(&ci->i_ceph_lock);
 882        return err;
 883}
 884
 885static int ceph_sync_setxattr(struct inode *inode, const char *name,
 886                              const char *value, size_t size, int flags)
 887{
 888        struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 889        struct ceph_inode_info *ci = ceph_inode(inode);
 890        struct ceph_mds_request *req;
 891        struct ceph_mds_client *mdsc = fsc->mdsc;
 892        struct ceph_pagelist *pagelist = NULL;
 893        int op = CEPH_MDS_OP_SETXATTR;
 894        int err;
 895
 896        if (size > 0) {
 897                /* copy value into pagelist */
 898                pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
 899                if (!pagelist)
 900                        return -ENOMEM;
 901
 902                ceph_pagelist_init(pagelist);
 903                err = ceph_pagelist_append(pagelist, value, size);
 904                if (err)
 905                        goto out;
 906        } else if (!value) {
 907                if (flags & CEPH_XATTR_REPLACE)
 908                        op = CEPH_MDS_OP_RMXATTR;
 909                else
 910                        flags |= CEPH_XATTR_REMOVE;
 911        }
 912
 913        dout("setxattr value=%.*s\n", (int)size, value);
 914
 915        /* do request */
 916        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 917        if (IS_ERR(req)) {
 918                err = PTR_ERR(req);
 919                goto out;
 920        }
 921
 922        req->r_path2 = kstrdup(name, GFP_NOFS);
 923        if (!req->r_path2) {
 924                ceph_mdsc_put_request(req);
 925                err = -ENOMEM;
 926                goto out;
 927        }
 928
 929        if (op == CEPH_MDS_OP_SETXATTR) {
 930                req->r_args.setxattr.flags = cpu_to_le32(flags);
 931                req->r_pagelist = pagelist;
 932                pagelist = NULL;
 933        }
 934
 935        req->r_inode = inode;
 936        ihold(inode);
 937        req->r_num_caps = 1;
 938        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 939
 940        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 941        err = ceph_mdsc_do_request(mdsc, NULL, req);
 942        ceph_mdsc_put_request(req);
 943        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 944
 945out:
 946        if (pagelist)
 947                ceph_pagelist_release(pagelist);
 948        return err;
 949}
 950
 951int __ceph_setxattr(struct inode *inode, const char *name,
 952                        const void *value, size_t size, int flags)
 953{
 954        struct ceph_vxattr *vxattr;
 955        struct ceph_inode_info *ci = ceph_inode(inode);
 956        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 957        struct ceph_cap_flush *prealloc_cf = NULL;
 958        int issued;
 959        int err;
 960        int dirty = 0;
 961        int name_len = strlen(name);
 962        int val_len = size;
 963        char *newname = NULL;
 964        char *newval = NULL;
 965        struct ceph_inode_xattr *xattr = NULL;
 966        int required_blob_size;
 967        bool lock_snap_rwsem = false;
 968
 969        if (ceph_snap(inode) != CEPH_NOSNAP)
 970                return -EROFS;
 971
 972        vxattr = ceph_match_vxattr(inode, name);
 973        if (vxattr && vxattr->readonly)
 974                return -EOPNOTSUPP;
 975
 976        /* pass any unhandled ceph.* xattrs through to the MDS */
 977        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
 978                goto do_sync_unlocked;
 979
 980        /* preallocate memory for xattr name, value, index node */
 981        err = -ENOMEM;
 982        newname = kmemdup(name, name_len + 1, GFP_NOFS);
 983        if (!newname)
 984                goto out;
 985
 986        if (val_len) {
 987                newval = kmemdup(value, val_len, GFP_NOFS);
 988                if (!newval)
 989                        goto out;
 990        }
 991
 992        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
 993        if (!xattr)
 994                goto out;
 995
 996        prealloc_cf = ceph_alloc_cap_flush();
 997        if (!prealloc_cf)
 998                goto out;
 999
1000        spin_lock(&ci->i_ceph_lock);
1001retry:
1002        issued = __ceph_caps_issued(ci, NULL);
1003        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1004                goto do_sync;
1005
1006        if (!lock_snap_rwsem && !ci->i_head_snapc) {
1007                lock_snap_rwsem = true;
1008                if (!down_read_trylock(&mdsc->snap_rwsem)) {
1009                        spin_unlock(&ci->i_ceph_lock);
1010                        down_read(&mdsc->snap_rwsem);
1011                        spin_lock(&ci->i_ceph_lock);
1012                        goto retry;
1013                }
1014        }
1015
1016        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
1017        __build_xattrs(inode);
1018
1019        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1020
1021        if (!ci->i_xattrs.prealloc_blob ||
1022            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1023                struct ceph_buffer *blob;
1024
1025                spin_unlock(&ci->i_ceph_lock);
1026                dout(" preaallocating new blob size=%d\n", required_blob_size);
1027                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1028                if (!blob)
1029                        goto do_sync_unlocked;
1030                spin_lock(&ci->i_ceph_lock);
1031                if (ci->i_xattrs.prealloc_blob)
1032                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1033                ci->i_xattrs.prealloc_blob = blob;
1034                goto retry;
1035        }
1036
1037        err = __set_xattr(ci, newname, name_len, newval, val_len,
1038                          flags, value ? 1 : -1, &xattr);
1039
1040        if (!err) {
1041                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1042                                               &prealloc_cf);
1043                ci->i_xattrs.dirty = true;
1044                inode->i_ctime = current_time(inode);
1045        }
1046
1047        spin_unlock(&ci->i_ceph_lock);
1048        if (lock_snap_rwsem)
1049                up_read(&mdsc->snap_rwsem);
1050        if (dirty)
1051                __mark_inode_dirty(inode, dirty);
1052        ceph_free_cap_flush(prealloc_cf);
1053        return err;
1054
1055do_sync:
1056        spin_unlock(&ci->i_ceph_lock);
1057do_sync_unlocked:
1058        if (lock_snap_rwsem)
1059                up_read(&mdsc->snap_rwsem);
1060
1061        /* security module set xattr while filling trace */
1062        if (current->journal_info) {
1063                pr_warn_ratelimited("sync setxattr %p "
1064                                    "during filling trace\n", inode);
1065                err = -EBUSY;
1066        } else {
1067                err = ceph_sync_setxattr(inode, name, value, size, flags);
1068        }
1069out:
1070        ceph_free_cap_flush(prealloc_cf);
1071        kfree(newname);
1072        kfree(newval);
1073        kfree(xattr);
1074        return err;
1075}
1076
1077static int ceph_get_xattr_handler(const struct xattr_handler *handler,
1078                                  struct dentry *dentry, struct inode *inode,
1079                                  const char *name, void *value, size_t size)
1080{
1081        if (!ceph_is_valid_xattr(name))
1082                return -EOPNOTSUPP;
1083        return __ceph_getxattr(inode, name, value, size);
1084}
1085
1086static int ceph_set_xattr_handler(const struct xattr_handler *handler,
1087                                  struct dentry *unused, struct inode *inode,
1088                                  const char *name, const void *value,
1089                                  size_t size, int flags)
1090{
1091        if (!ceph_is_valid_xattr(name))
1092                return -EOPNOTSUPP;
1093        return __ceph_setxattr(inode, name, value, size, flags);
1094}
1095
1096static const struct xattr_handler ceph_other_xattr_handler = {
1097        .prefix = "",  /* match any name => handlers called with full name */
1098        .get = ceph_get_xattr_handler,
1099        .set = ceph_set_xattr_handler,
1100};
1101
1102#ifdef CONFIG_SECURITY
1103bool ceph_security_xattr_wanted(struct inode *in)
1104{
1105        return in->i_security != NULL;
1106}
1107
1108bool ceph_security_xattr_deadlock(struct inode *in)
1109{
1110        struct ceph_inode_info *ci;
1111        bool ret;
1112        if (!in->i_security)
1113                return false;
1114        ci = ceph_inode(in);
1115        spin_lock(&ci->i_ceph_lock);
1116        ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1117              !(ci->i_xattrs.version > 0 &&
1118                __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1119        spin_unlock(&ci->i_ceph_lock);
1120        return ret;
1121}
1122#endif
1123