linux/fs/ceph/xattr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/ceph/ceph_debug.h>
   3#include <linux/ceph/pagelist.h>
   4
   5#include "super.h"
   6#include "mds_client.h"
   7
   8#include <linux/ceph/decode.h>
   9
  10#include <linux/xattr.h>
  11#include <linux/posix_acl_xattr.h>
  12#include <linux/slab.h>
  13
  14#define XATTR_CEPH_PREFIX "ceph."
  15#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  16
  17static int __remove_xattr(struct ceph_inode_info *ci,
  18                          struct ceph_inode_xattr *xattr);
  19
  20static const struct xattr_handler ceph_other_xattr_handler;
  21
  22/*
  23 * List of handlers for synthetic system.* attributes. Other
  24 * attributes are handled directly.
  25 */
  26const struct xattr_handler *ceph_xattr_handlers[] = {
  27#ifdef CONFIG_CEPH_FS_POSIX_ACL
  28        &posix_acl_access_xattr_handler,
  29        &posix_acl_default_xattr_handler,
  30#endif
  31        &ceph_other_xattr_handler,
  32        NULL,
  33};
  34
  35static bool ceph_is_valid_xattr(const char *name)
  36{
  37        return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  38               !strncmp(name, XATTR_SECURITY_PREFIX,
  39                        XATTR_SECURITY_PREFIX_LEN) ||
  40               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  41               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  42}
  43
  44/*
  45 * These define virtual xattrs exposing the recursive directory
  46 * statistics and layout metadata.
  47 */
  48struct ceph_vxattr {
  49        char *name;
  50        size_t name_size;       /* strlen(name) + 1 (for '\0') */
  51        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  52                              size_t size);
  53        bool (*exists_cb)(struct ceph_inode_info *ci);
  54        unsigned int flags;
  55};
  56
  57#define VXATTR_FLAG_READONLY            (1<<0)
  58#define VXATTR_FLAG_HIDDEN              (1<<1)
  59#define VXATTR_FLAG_RSTAT               (1<<2)
  60
  61/* layouts */
  62
  63static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  64{
  65        struct ceph_file_layout *fl = &ci->i_layout;
  66        return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
  67                fl->object_size > 0 || fl->pool_id >= 0 ||
  68                rcu_dereference_raw(fl->pool_ns) != NULL);
  69}
  70
  71static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  72                                   size_t size)
  73{
  74        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  75        struct ceph_osd_client *osdc = &fsc->client->osdc;
  76        struct ceph_string *pool_ns;
  77        s64 pool = ci->i_layout.pool_id;
  78        const char *pool_name;
  79        const char *ns_field = " pool_namespace=";
  80        char buf[128];
  81        size_t len, total_len = 0;
  82        int ret;
  83
  84        pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
  85
  86        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  87        down_read(&osdc->lock);
  88        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  89        if (pool_name) {
  90                len = snprintf(buf, sizeof(buf),
  91                "stripe_unit=%u stripe_count=%u object_size=%u pool=",
  92                ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  93                ci->i_layout.object_size);
  94                total_len = len + strlen(pool_name);
  95        } else {
  96                len = snprintf(buf, sizeof(buf),
  97                "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
  98                ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  99                ci->i_layout.object_size, (unsigned long long)pool);
 100                total_len = len;
 101        }
 102
 103        if (pool_ns)
 104                total_len += strlen(ns_field) + pool_ns->len;
 105
 106        if (!size) {
 107                ret = total_len;
 108        } else if (total_len > size) {
 109                ret = -ERANGE;
 110        } else {
 111                memcpy(val, buf, len);
 112                ret = len;
 113                if (pool_name) {
 114                        len = strlen(pool_name);
 115                        memcpy(val + ret, pool_name, len);
 116                        ret += len;
 117                }
 118                if (pool_ns) {
 119                        len = strlen(ns_field);
 120                        memcpy(val + ret, ns_field, len);
 121                        ret += len;
 122                        memcpy(val + ret, pool_ns->str, pool_ns->len);
 123                        ret += pool_ns->len;
 124                }
 125        }
 126        up_read(&osdc->lock);
 127        ceph_put_string(pool_ns);
 128        return ret;
 129}
 130
 131static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
 132                                               char *val, size_t size)
 133{
 134        return snprintf(val, size, "%u", ci->i_layout.stripe_unit);
 135}
 136
 137static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
 138                                                char *val, size_t size)
 139{
 140        return snprintf(val, size, "%u", ci->i_layout.stripe_count);
 141}
 142
 143static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
 144                                               char *val, size_t size)
 145{
 146        return snprintf(val, size, "%u", ci->i_layout.object_size);
 147}
 148
 149static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 150                                        char *val, size_t size)
 151{
 152        int ret;
 153        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
 154        struct ceph_osd_client *osdc = &fsc->client->osdc;
 155        s64 pool = ci->i_layout.pool_id;
 156        const char *pool_name;
 157
 158        down_read(&osdc->lock);
 159        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 160        if (pool_name)
 161                ret = snprintf(val, size, "%s", pool_name);
 162        else
 163                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
 164        up_read(&osdc->lock);
 165        return ret;
 166}
 167
 168static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
 169                                                  char *val, size_t size)
 170{
 171        int ret = 0;
 172        struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
 173        if (ns) {
 174                ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str);
 175                ceph_put_string(ns);
 176        }
 177        return ret;
 178}
 179
 180/* directories */
 181
 182static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
 183                                        size_t size)
 184{
 185        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
 186}
 187
 188static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
 189                                      size_t size)
 190{
 191        return snprintf(val, size, "%lld", ci->i_files);
 192}
 193
 194static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
 195                                        size_t size)
 196{
 197        return snprintf(val, size, "%lld", ci->i_subdirs);
 198}
 199
 200static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
 201                                         size_t size)
 202{
 203        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
 204}
 205
 206static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
 207                                       size_t size)
 208{
 209        return snprintf(val, size, "%lld", ci->i_rfiles);
 210}
 211
 212static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
 213                                         size_t size)
 214{
 215        return snprintf(val, size, "%lld", ci->i_rsubdirs);
 216}
 217
 218static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
 219                                       size_t size)
 220{
 221        return snprintf(val, size, "%lld", ci->i_rbytes);
 222}
 223
 224static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 225                                       size_t size)
 226{
 227        return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec,
 228                        ci->i_rctime.tv_nsec);
 229}
 230
 231/* quotas */
 232
 233static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
 234{
 235        bool ret = false;
 236        spin_lock(&ci->i_ceph_lock);
 237        if ((ci->i_max_files || ci->i_max_bytes) &&
 238            ci->i_vino.snap == CEPH_NOSNAP &&
 239            ci->i_snap_realm &&
 240            ci->i_snap_realm->ino == ci->i_vino.ino)
 241                ret = true;
 242        spin_unlock(&ci->i_ceph_lock);
 243        return ret;
 244}
 245
 246static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
 247                                  size_t size)
 248{
 249        return snprintf(val, size, "max_bytes=%llu max_files=%llu",
 250                        ci->i_max_bytes, ci->i_max_files);
 251}
 252
 253static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
 254                                            char *val, size_t size)
 255{
 256        return snprintf(val, size, "%llu", ci->i_max_bytes);
 257}
 258
 259static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
 260                                            char *val, size_t size)
 261{
 262        return snprintf(val, size, "%llu", ci->i_max_files);
 263}
 264
 265#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
 266#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
 267        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
 268
 269#define XATTR_NAME_CEPH(_type, _name, _flags)                           \
 270        {                                                               \
 271                .name = CEPH_XATTR_NAME(_type, _name),                  \
 272                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
 273                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
 274                .exists_cb = NULL,                                      \
 275                .flags = (VXATTR_FLAG_READONLY | _flags),               \
 276        }
 277#define XATTR_RSTAT_FIELD(_type, _name)                 \
 278        XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
 279#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
 280        {                                                               \
 281                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
 282                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
 283                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
 284                .exists_cb = ceph_vxattrcb_layout_exists,       \
 285                .flags = VXATTR_FLAG_HIDDEN,                    \
 286        }
 287#define XATTR_QUOTA_FIELD(_type, _name)                                 \
 288        {                                                               \
 289                .name = CEPH_XATTR_NAME(_type, _name),                  \
 290                .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),     \
 291                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,   \
 292                .exists_cb = ceph_vxattrcb_quota_exists,                \
 293                .flags = VXATTR_FLAG_HIDDEN,                            \
 294        }
 295
 296static struct ceph_vxattr ceph_dir_vxattrs[] = {
 297        {
 298                .name = "ceph.dir.layout",
 299                .name_size = sizeof("ceph.dir.layout"),
 300                .getxattr_cb = ceph_vxattrcb_layout,
 301                .exists_cb = ceph_vxattrcb_layout_exists,
 302                .flags = VXATTR_FLAG_HIDDEN,
 303        },
 304        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
 305        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
 306        XATTR_LAYOUT_FIELD(dir, layout, object_size),
 307        XATTR_LAYOUT_FIELD(dir, layout, pool),
 308        XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
 309        XATTR_NAME_CEPH(dir, entries, 0),
 310        XATTR_NAME_CEPH(dir, files, 0),
 311        XATTR_NAME_CEPH(dir, subdirs, 0),
 312        XATTR_RSTAT_FIELD(dir, rentries),
 313        XATTR_RSTAT_FIELD(dir, rfiles),
 314        XATTR_RSTAT_FIELD(dir, rsubdirs),
 315        XATTR_RSTAT_FIELD(dir, rbytes),
 316        XATTR_RSTAT_FIELD(dir, rctime),
 317        {
 318                .name = "ceph.quota",
 319                .name_size = sizeof("ceph.quota"),
 320                .getxattr_cb = ceph_vxattrcb_quota,
 321                .exists_cb = ceph_vxattrcb_quota_exists,
 322                .flags = VXATTR_FLAG_HIDDEN,
 323        },
 324        XATTR_QUOTA_FIELD(quota, max_bytes),
 325        XATTR_QUOTA_FIELD(quota, max_files),
 326        { .name = NULL, 0 }     /* Required table terminator */
 327};
 328static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 329
 330/* files */
 331
 332static struct ceph_vxattr ceph_file_vxattrs[] = {
 333        {
 334                .name = "ceph.file.layout",
 335                .name_size = sizeof("ceph.file.layout"),
 336                .getxattr_cb = ceph_vxattrcb_layout,
 337                .exists_cb = ceph_vxattrcb_layout_exists,
 338                .flags = VXATTR_FLAG_HIDDEN,
 339        },
 340        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
 341        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
 342        XATTR_LAYOUT_FIELD(file, layout, object_size),
 343        XATTR_LAYOUT_FIELD(file, layout, pool),
 344        XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
 345        { .name = NULL, 0 }     /* Required table terminator */
 346};
 347static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
 348
 349static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
 350{
 351        if (S_ISDIR(inode->i_mode))
 352                return ceph_dir_vxattrs;
 353        else if (S_ISREG(inode->i_mode))
 354                return ceph_file_vxattrs;
 355        return NULL;
 356}
 357
 358static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
 359{
 360        if (vxattrs == ceph_dir_vxattrs)
 361                return ceph_dir_vxattrs_name_size;
 362        if (vxattrs == ceph_file_vxattrs)
 363                return ceph_file_vxattrs_name_size;
 364        BUG_ON(vxattrs);
 365        return 0;
 366}
 367
 368/*
 369 * Compute the aggregate size (including terminating '\0') of all
 370 * virtual extended attribute names in the given vxattr table.
 371 */
 372static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
 373{
 374        struct ceph_vxattr *vxattr;
 375        size_t size = 0;
 376
 377        for (vxattr = vxattrs; vxattr->name; vxattr++) {
 378                if (!(vxattr->flags & VXATTR_FLAG_HIDDEN))
 379                        size += vxattr->name_size;
 380        }
 381
 382        return size;
 383}
 384
 385/* Routines called at initialization and exit time */
 386
 387void __init ceph_xattr_init(void)
 388{
 389        ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
 390        ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
 391}
 392
 393void ceph_xattr_exit(void)
 394{
 395        ceph_dir_vxattrs_name_size = 0;
 396        ceph_file_vxattrs_name_size = 0;
 397}
 398
 399static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 400                                                const char *name)
 401{
 402        struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
 403
 404        if (vxattr) {
 405                while (vxattr->name) {
 406                        if (!strcmp(vxattr->name, name))
 407                                return vxattr;
 408                        vxattr++;
 409                }
 410        }
 411
 412        return NULL;
 413}
 414
 415static int __set_xattr(struct ceph_inode_info *ci,
 416                           const char *name, int name_len,
 417                           const char *val, int val_len,
 418                           int flags, int update_xattr,
 419                           struct ceph_inode_xattr **newxattr)
 420{
 421        struct rb_node **p;
 422        struct rb_node *parent = NULL;
 423        struct ceph_inode_xattr *xattr = NULL;
 424        int c;
 425        int new = 0;
 426
 427        p = &ci->i_xattrs.index.rb_node;
 428        while (*p) {
 429                parent = *p;
 430                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 431                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 432                if (c < 0)
 433                        p = &(*p)->rb_left;
 434                else if (c > 0)
 435                        p = &(*p)->rb_right;
 436                else {
 437                        if (name_len == xattr->name_len)
 438                                break;
 439                        else if (name_len < xattr->name_len)
 440                                p = &(*p)->rb_left;
 441                        else
 442                                p = &(*p)->rb_right;
 443                }
 444                xattr = NULL;
 445        }
 446
 447        if (update_xattr) {
 448                int err = 0;
 449
 450                if (xattr && (flags & XATTR_CREATE))
 451                        err = -EEXIST;
 452                else if (!xattr && (flags & XATTR_REPLACE))
 453                        err = -ENODATA;
 454                if (err) {
 455                        kfree(name);
 456                        kfree(val);
 457                        kfree(*newxattr);
 458                        return err;
 459                }
 460                if (update_xattr < 0) {
 461                        if (xattr)
 462                                __remove_xattr(ci, xattr);
 463                        kfree(name);
 464                        kfree(*newxattr);
 465                        return 0;
 466                }
 467        }
 468
 469        if (!xattr) {
 470                new = 1;
 471                xattr = *newxattr;
 472                xattr->name = name;
 473                xattr->name_len = name_len;
 474                xattr->should_free_name = update_xattr;
 475
 476                ci->i_xattrs.count++;
 477                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 478        } else {
 479                kfree(*newxattr);
 480                *newxattr = NULL;
 481                if (xattr->should_free_val)
 482                        kfree((void *)xattr->val);
 483
 484                if (update_xattr) {
 485                        kfree((void *)name);
 486                        name = xattr->name;
 487                }
 488                ci->i_xattrs.names_size -= xattr->name_len;
 489                ci->i_xattrs.vals_size -= xattr->val_len;
 490        }
 491        ci->i_xattrs.names_size += name_len;
 492        ci->i_xattrs.vals_size += val_len;
 493        if (val)
 494                xattr->val = val;
 495        else
 496                xattr->val = "";
 497
 498        xattr->val_len = val_len;
 499        xattr->dirty = update_xattr;
 500        xattr->should_free_val = (val && update_xattr);
 501
 502        if (new) {
 503                rb_link_node(&xattr->node, parent, p);
 504                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 505                dout("__set_xattr_val p=%p\n", p);
 506        }
 507
 508        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 509             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 510
 511        return 0;
 512}
 513
 514static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 515                           const char *name)
 516{
 517        struct rb_node **p;
 518        struct rb_node *parent = NULL;
 519        struct ceph_inode_xattr *xattr = NULL;
 520        int name_len = strlen(name);
 521        int c;
 522
 523        p = &ci->i_xattrs.index.rb_node;
 524        while (*p) {
 525                parent = *p;
 526                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 527                c = strncmp(name, xattr->name, xattr->name_len);
 528                if (c == 0 && name_len > xattr->name_len)
 529                        c = 1;
 530                if (c < 0)
 531                        p = &(*p)->rb_left;
 532                else if (c > 0)
 533                        p = &(*p)->rb_right;
 534                else {
 535                        dout("__get_xattr %s: found %.*s\n", name,
 536                             xattr->val_len, xattr->val);
 537                        return xattr;
 538                }
 539        }
 540
 541        dout("__get_xattr %s: not found\n", name);
 542
 543        return NULL;
 544}
 545
 546static void __free_xattr(struct ceph_inode_xattr *xattr)
 547{
 548        BUG_ON(!xattr);
 549
 550        if (xattr->should_free_name)
 551                kfree((void *)xattr->name);
 552        if (xattr->should_free_val)
 553                kfree((void *)xattr->val);
 554
 555        kfree(xattr);
 556}
 557
 558static int __remove_xattr(struct ceph_inode_info *ci,
 559                          struct ceph_inode_xattr *xattr)
 560{
 561        if (!xattr)
 562                return -ENODATA;
 563
 564        rb_erase(&xattr->node, &ci->i_xattrs.index);
 565
 566        if (xattr->should_free_name)
 567                kfree((void *)xattr->name);
 568        if (xattr->should_free_val)
 569                kfree((void *)xattr->val);
 570
 571        ci->i_xattrs.names_size -= xattr->name_len;
 572        ci->i_xattrs.vals_size -= xattr->val_len;
 573        ci->i_xattrs.count--;
 574        kfree(xattr);
 575
 576        return 0;
 577}
 578
 579static char *__copy_xattr_names(struct ceph_inode_info *ci,
 580                                char *dest)
 581{
 582        struct rb_node *p;
 583        struct ceph_inode_xattr *xattr = NULL;
 584
 585        p = rb_first(&ci->i_xattrs.index);
 586        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 587
 588        while (p) {
 589                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 590                memcpy(dest, xattr->name, xattr->name_len);
 591                dest[xattr->name_len] = '\0';
 592
 593                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 594                     xattr->name_len, ci->i_xattrs.names_size);
 595
 596                dest += xattr->name_len + 1;
 597                p = rb_next(p);
 598        }
 599
 600        return dest;
 601}
 602
 603void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 604{
 605        struct rb_node *p, *tmp;
 606        struct ceph_inode_xattr *xattr = NULL;
 607
 608        p = rb_first(&ci->i_xattrs.index);
 609
 610        dout("__ceph_destroy_xattrs p=%p\n", p);
 611
 612        while (p) {
 613                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 614                tmp = p;
 615                p = rb_next(tmp);
 616                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 617                     xattr->name_len, xattr->name);
 618                rb_erase(tmp, &ci->i_xattrs.index);
 619
 620                __free_xattr(xattr);
 621        }
 622
 623        ci->i_xattrs.names_size = 0;
 624        ci->i_xattrs.vals_size = 0;
 625        ci->i_xattrs.index_version = 0;
 626        ci->i_xattrs.count = 0;
 627        ci->i_xattrs.index = RB_ROOT;
 628}
 629
 630static int __build_xattrs(struct inode *inode)
 631        __releases(ci->i_ceph_lock)
 632        __acquires(ci->i_ceph_lock)
 633{
 634        u32 namelen;
 635        u32 numattr = 0;
 636        void *p, *end;
 637        u32 len;
 638        const char *name, *val;
 639        struct ceph_inode_info *ci = ceph_inode(inode);
 640        int xattr_version;
 641        struct ceph_inode_xattr **xattrs = NULL;
 642        int err = 0;
 643        int i;
 644
 645        dout("__build_xattrs() len=%d\n",
 646             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 647
 648        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 649                return 0; /* already built */
 650
 651        __ceph_destroy_xattrs(ci);
 652
 653start:
 654        /* updated internal xattr rb tree */
 655        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 656                p = ci->i_xattrs.blob->vec.iov_base;
 657                end = p + ci->i_xattrs.blob->vec.iov_len;
 658                ceph_decode_32_safe(&p, end, numattr, bad);
 659                xattr_version = ci->i_xattrs.version;
 660                spin_unlock(&ci->i_ceph_lock);
 661
 662                xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
 663                                 GFP_NOFS);
 664                err = -ENOMEM;
 665                if (!xattrs)
 666                        goto bad_lock;
 667
 668                for (i = 0; i < numattr; i++) {
 669                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 670                                            GFP_NOFS);
 671                        if (!xattrs[i])
 672                                goto bad_lock;
 673                }
 674
 675                spin_lock(&ci->i_ceph_lock);
 676                if (ci->i_xattrs.version != xattr_version) {
 677                        /* lost a race, retry */
 678                        for (i = 0; i < numattr; i++)
 679                                kfree(xattrs[i]);
 680                        kfree(xattrs);
 681                        xattrs = NULL;
 682                        goto start;
 683                }
 684                err = -EIO;
 685                while (numattr--) {
 686                        ceph_decode_32_safe(&p, end, len, bad);
 687                        namelen = len;
 688                        name = p;
 689                        p += len;
 690                        ceph_decode_32_safe(&p, end, len, bad);
 691                        val = p;
 692                        p += len;
 693
 694                        err = __set_xattr(ci, name, namelen, val, len,
 695                                          0, 0, &xattrs[numattr]);
 696
 697                        if (err < 0)
 698                                goto bad;
 699                }
 700                kfree(xattrs);
 701        }
 702        ci->i_xattrs.index_version = ci->i_xattrs.version;
 703        ci->i_xattrs.dirty = false;
 704
 705        return err;
 706bad_lock:
 707        spin_lock(&ci->i_ceph_lock);
 708bad:
 709        if (xattrs) {
 710                for (i = 0; i < numattr; i++)
 711                        kfree(xattrs[i]);
 712                kfree(xattrs);
 713        }
 714        ci->i_xattrs.names_size = 0;
 715        return err;
 716}
 717
 718static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 719                                    int val_size)
 720{
 721        /*
 722         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 723         * 4 bytes per each value
 724         */
 725        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 726                             ci->i_xattrs.names_size +
 727                             ci->i_xattrs.vals_size;
 728        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 729             ci->i_xattrs.count, ci->i_xattrs.names_size,
 730             ci->i_xattrs.vals_size);
 731
 732        if (name_size)
 733                size += 4 + 4 + name_size + val_size;
 734
 735        return size;
 736}
 737
 738/*
 739 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 740 * and swap into place.
 741 */
 742void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 743{
 744        struct rb_node *p;
 745        struct ceph_inode_xattr *xattr = NULL;
 746        void *dest;
 747
 748        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 749        if (ci->i_xattrs.dirty) {
 750                int need = __get_required_blob_size(ci, 0, 0);
 751
 752                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 753
 754                p = rb_first(&ci->i_xattrs.index);
 755                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 756
 757                ceph_encode_32(&dest, ci->i_xattrs.count);
 758                while (p) {
 759                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 760
 761                        ceph_encode_32(&dest, xattr->name_len);
 762                        memcpy(dest, xattr->name, xattr->name_len);
 763                        dest += xattr->name_len;
 764                        ceph_encode_32(&dest, xattr->val_len);
 765                        memcpy(dest, xattr->val, xattr->val_len);
 766                        dest += xattr->val_len;
 767
 768                        p = rb_next(p);
 769                }
 770
 771                /* adjust buffer len; it may be larger than we need */
 772                ci->i_xattrs.prealloc_blob->vec.iov_len =
 773                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 774
 775                if (ci->i_xattrs.blob)
 776                        ceph_buffer_put(ci->i_xattrs.blob);
 777                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 778                ci->i_xattrs.prealloc_blob = NULL;
 779                ci->i_xattrs.dirty = false;
 780                ci->i_xattrs.version++;
 781        }
 782}
 783
 784static inline int __get_request_mask(struct inode *in) {
 785        struct ceph_mds_request *req = current->journal_info;
 786        int mask = 0;
 787        if (req && req->r_target_inode == in) {
 788                if (req->r_op == CEPH_MDS_OP_LOOKUP ||
 789                    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
 790                    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
 791                    req->r_op == CEPH_MDS_OP_GETATTR) {
 792                        mask = le32_to_cpu(req->r_args.getattr.mask);
 793                } else if (req->r_op == CEPH_MDS_OP_OPEN ||
 794                           req->r_op == CEPH_MDS_OP_CREATE) {
 795                        mask = le32_to_cpu(req->r_args.open.mask);
 796                }
 797        }
 798        return mask;
 799}
 800
 801ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 802                      size_t size)
 803{
 804        struct ceph_inode_info *ci = ceph_inode(inode);
 805        struct ceph_inode_xattr *xattr;
 806        struct ceph_vxattr *vxattr = NULL;
 807        int req_mask;
 808        int err;
 809
 810        /* let's see if a virtual xattr was requested */
 811        vxattr = ceph_match_vxattr(inode, name);
 812        if (vxattr) {
 813                int mask = 0;
 814                if (vxattr->flags & VXATTR_FLAG_RSTAT)
 815                        mask |= CEPH_STAT_RSTAT;
 816                err = ceph_do_getattr(inode, mask, true);
 817                if (err)
 818                        return err;
 819                err = -ENODATA;
 820                if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
 821                        err = vxattr->getxattr_cb(ci, value, size);
 822                return err;
 823        }
 824
 825        req_mask = __get_request_mask(inode);
 826
 827        spin_lock(&ci->i_ceph_lock);
 828        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 829             ci->i_xattrs.version, ci->i_xattrs.index_version);
 830
 831        if (ci->i_xattrs.version == 0 ||
 832            !((req_mask & CEPH_CAP_XATTR_SHARED) ||
 833              __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
 834                spin_unlock(&ci->i_ceph_lock);
 835
 836                /* security module gets xattr while filling trace */
 837                if (current->journal_info) {
 838                        pr_warn_ratelimited("sync getxattr %p "
 839                                            "during filling trace\n", inode);
 840                        return -EBUSY;
 841                }
 842
 843                /* get xattrs from mds (if we don't already have them) */
 844                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 845                if (err)
 846                        return err;
 847                spin_lock(&ci->i_ceph_lock);
 848        }
 849
 850        err = __build_xattrs(inode);
 851        if (err < 0)
 852                goto out;
 853
 854        err = -ENODATA;  /* == ENOATTR */
 855        xattr = __get_xattr(ci, name);
 856        if (!xattr)
 857                goto out;
 858
 859        err = -ERANGE;
 860        if (size && size < xattr->val_len)
 861                goto out;
 862
 863        err = xattr->val_len;
 864        if (size == 0)
 865                goto out;
 866
 867        memcpy(value, xattr->val, xattr->val_len);
 868
 869        if (current->journal_info &&
 870            !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
 871                ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 872out:
 873        spin_unlock(&ci->i_ceph_lock);
 874        return err;
 875}
 876
 877ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 878{
 879        struct inode *inode = d_inode(dentry);
 880        struct ceph_inode_info *ci = ceph_inode(inode);
 881        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
 882        u32 vir_namelen = 0;
 883        u32 namelen;
 884        int err;
 885        u32 len;
 886        int i;
 887
 888        spin_lock(&ci->i_ceph_lock);
 889        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 890             ci->i_xattrs.version, ci->i_xattrs.index_version);
 891
 892        if (ci->i_xattrs.version == 0 ||
 893            !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
 894                spin_unlock(&ci->i_ceph_lock);
 895                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 896                if (err)
 897                        return err;
 898                spin_lock(&ci->i_ceph_lock);
 899        }
 900
 901        err = __build_xattrs(inode);
 902        if (err < 0)
 903                goto out;
 904        /*
 905         * Start with virtual dir xattr names (if any) (including
 906         * terminating '\0' characters for each).
 907         */
 908        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 909
 910        /* adding 1 byte per each variable due to the null termination */
 911        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
 912        err = -ERANGE;
 913        if (size && vir_namelen + namelen > size)
 914                goto out;
 915
 916        err = namelen + vir_namelen;
 917        if (size == 0)
 918                goto out;
 919
 920        names = __copy_xattr_names(ci, names);
 921
 922        /* virtual xattr names, too */
 923        err = namelen;
 924        if (vxattrs) {
 925                for (i = 0; vxattrs[i].name; i++) {
 926                        if (!(vxattrs[i].flags & VXATTR_FLAG_HIDDEN) &&
 927                            !(vxattrs[i].exists_cb &&
 928                              !vxattrs[i].exists_cb(ci))) {
 929                                len = sprintf(names, "%s", vxattrs[i].name);
 930                                names += len + 1;
 931                                err += len + 1;
 932                        }
 933                }
 934        }
 935
 936out:
 937        spin_unlock(&ci->i_ceph_lock);
 938        return err;
 939}
 940
 941static int ceph_sync_setxattr(struct inode *inode, const char *name,
 942                              const char *value, size_t size, int flags)
 943{
 944        struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 945        struct ceph_inode_info *ci = ceph_inode(inode);
 946        struct ceph_mds_request *req;
 947        struct ceph_mds_client *mdsc = fsc->mdsc;
 948        struct ceph_pagelist *pagelist = NULL;
 949        int op = CEPH_MDS_OP_SETXATTR;
 950        int err;
 951
 952        if (size > 0) {
 953                /* copy value into pagelist */
 954                pagelist = ceph_pagelist_alloc(GFP_NOFS);
 955                if (!pagelist)
 956                        return -ENOMEM;
 957
 958                err = ceph_pagelist_append(pagelist, value, size);
 959                if (err)
 960                        goto out;
 961        } else if (!value) {
 962                if (flags & CEPH_XATTR_REPLACE)
 963                        op = CEPH_MDS_OP_RMXATTR;
 964                else
 965                        flags |= CEPH_XATTR_REMOVE;
 966        }
 967
 968        dout("setxattr value=%.*s\n", (int)size, value);
 969
 970        /* do request */
 971        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 972        if (IS_ERR(req)) {
 973                err = PTR_ERR(req);
 974                goto out;
 975        }
 976
 977        req->r_path2 = kstrdup(name, GFP_NOFS);
 978        if (!req->r_path2) {
 979                ceph_mdsc_put_request(req);
 980                err = -ENOMEM;
 981                goto out;
 982        }
 983
 984        if (op == CEPH_MDS_OP_SETXATTR) {
 985                req->r_args.setxattr.flags = cpu_to_le32(flags);
 986                req->r_pagelist = pagelist;
 987                pagelist = NULL;
 988        }
 989
 990        req->r_inode = inode;
 991        ihold(inode);
 992        req->r_num_caps = 1;
 993        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 994
 995        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 996        err = ceph_mdsc_do_request(mdsc, NULL, req);
 997        ceph_mdsc_put_request(req);
 998        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 999
1000out:
1001        if (pagelist)
1002                ceph_pagelist_release(pagelist);
1003        return err;
1004}
1005
1006int __ceph_setxattr(struct inode *inode, const char *name,
1007                        const void *value, size_t size, int flags)
1008{
1009        struct ceph_vxattr *vxattr;
1010        struct ceph_inode_info *ci = ceph_inode(inode);
1011        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1012        struct ceph_cap_flush *prealloc_cf = NULL;
1013        int issued;
1014        int err;
1015        int dirty = 0;
1016        int name_len = strlen(name);
1017        int val_len = size;
1018        char *newname = NULL;
1019        char *newval = NULL;
1020        struct ceph_inode_xattr *xattr = NULL;
1021        int required_blob_size;
1022        bool check_realm = false;
1023        bool lock_snap_rwsem = false;
1024
1025        if (ceph_snap(inode) != CEPH_NOSNAP)
1026                return -EROFS;
1027
1028        vxattr = ceph_match_vxattr(inode, name);
1029        if (vxattr) {
1030                if (vxattr->flags & VXATTR_FLAG_READONLY)
1031                        return -EOPNOTSUPP;
1032                if (value && !strncmp(vxattr->name, "ceph.quota", 10))
1033                        check_realm = true;
1034        }
1035
1036        /* pass any unhandled ceph.* xattrs through to the MDS */
1037        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1038                goto do_sync_unlocked;
1039
1040        /* preallocate memory for xattr name, value, index node */
1041        err = -ENOMEM;
1042        newname = kmemdup(name, name_len + 1, GFP_NOFS);
1043        if (!newname)
1044                goto out;
1045
1046        if (val_len) {
1047                newval = kmemdup(value, val_len, GFP_NOFS);
1048                if (!newval)
1049                        goto out;
1050        }
1051
1052        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
1053        if (!xattr)
1054                goto out;
1055
1056        prealloc_cf = ceph_alloc_cap_flush();
1057        if (!prealloc_cf)
1058                goto out;
1059
1060        spin_lock(&ci->i_ceph_lock);
1061retry:
1062        issued = __ceph_caps_issued(ci, NULL);
1063        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1064                goto do_sync;
1065
1066        if (!lock_snap_rwsem && !ci->i_head_snapc) {
1067                lock_snap_rwsem = true;
1068                if (!down_read_trylock(&mdsc->snap_rwsem)) {
1069                        spin_unlock(&ci->i_ceph_lock);
1070                        down_read(&mdsc->snap_rwsem);
1071                        spin_lock(&ci->i_ceph_lock);
1072                        goto retry;
1073                }
1074        }
1075
1076        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
1077        __build_xattrs(inode);
1078
1079        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1080
1081        if (!ci->i_xattrs.prealloc_blob ||
1082            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1083                struct ceph_buffer *blob;
1084
1085                spin_unlock(&ci->i_ceph_lock);
1086                dout(" preaallocating new blob size=%d\n", required_blob_size);
1087                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1088                if (!blob)
1089                        goto do_sync_unlocked;
1090                spin_lock(&ci->i_ceph_lock);
1091                if (ci->i_xattrs.prealloc_blob)
1092                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1093                ci->i_xattrs.prealloc_blob = blob;
1094                goto retry;
1095        }
1096
1097        err = __set_xattr(ci, newname, name_len, newval, val_len,
1098                          flags, value ? 1 : -1, &xattr);
1099
1100        if (!err) {
1101                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1102                                               &prealloc_cf);
1103                ci->i_xattrs.dirty = true;
1104                inode->i_ctime = current_time(inode);
1105        }
1106
1107        spin_unlock(&ci->i_ceph_lock);
1108        if (lock_snap_rwsem)
1109                up_read(&mdsc->snap_rwsem);
1110        if (dirty)
1111                __mark_inode_dirty(inode, dirty);
1112        ceph_free_cap_flush(prealloc_cf);
1113        return err;
1114
1115do_sync:
1116        spin_unlock(&ci->i_ceph_lock);
1117do_sync_unlocked:
1118        if (lock_snap_rwsem)
1119                up_read(&mdsc->snap_rwsem);
1120
1121        /* security module set xattr while filling trace */
1122        if (current->journal_info) {
1123                pr_warn_ratelimited("sync setxattr %p "
1124                                    "during filling trace\n", inode);
1125                err = -EBUSY;
1126        } else {
1127                err = ceph_sync_setxattr(inode, name, value, size, flags);
1128                if (err >= 0 && check_realm) {
1129                        /* check if snaprealm was created for quota inode */
1130                        spin_lock(&ci->i_ceph_lock);
1131                        if ((ci->i_max_files || ci->i_max_bytes) &&
1132                            !(ci->i_snap_realm &&
1133                              ci->i_snap_realm->ino == ci->i_vino.ino))
1134                                err = -EOPNOTSUPP;
1135                        spin_unlock(&ci->i_ceph_lock);
1136                }
1137        }
1138out:
1139        ceph_free_cap_flush(prealloc_cf);
1140        kfree(newname);
1141        kfree(newval);
1142        kfree(xattr);
1143        return err;
1144}
1145
1146static int ceph_get_xattr_handler(const struct xattr_handler *handler,
1147                                  struct dentry *dentry, struct inode *inode,
1148                                  const char *name, void *value, size_t size)
1149{
1150        if (!ceph_is_valid_xattr(name))
1151                return -EOPNOTSUPP;
1152        return __ceph_getxattr(inode, name, value, size);
1153}
1154
1155static int ceph_set_xattr_handler(const struct xattr_handler *handler,
1156                                  struct dentry *unused, struct inode *inode,
1157                                  const char *name, const void *value,
1158                                  size_t size, int flags)
1159{
1160        if (!ceph_is_valid_xattr(name))
1161                return -EOPNOTSUPP;
1162        return __ceph_setxattr(inode, name, value, size, flags);
1163}
1164
1165static const struct xattr_handler ceph_other_xattr_handler = {
1166        .prefix = "",  /* match any name => handlers called with full name */
1167        .get = ceph_get_xattr_handler,
1168        .set = ceph_set_xattr_handler,
1169};
1170
1171#ifdef CONFIG_SECURITY
1172bool ceph_security_xattr_wanted(struct inode *in)
1173{
1174        return in->i_security != NULL;
1175}
1176
1177bool ceph_security_xattr_deadlock(struct inode *in)
1178{
1179        struct ceph_inode_info *ci;
1180        bool ret;
1181        if (!in->i_security)
1182                return false;
1183        ci = ceph_inode(in);
1184        spin_lock(&ci->i_ceph_lock);
1185        ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1186              !(ci->i_xattrs.version > 0 &&
1187                __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1188        spin_unlock(&ci->i_ceph_lock);
1189        return ret;
1190}
1191#endif
1192