linux/fs/ceph/xattr.c
<<
>>
Prefs
   1#include <linux/ceph/ceph_debug.h>
   2#include <linux/ceph/pagelist.h>
   3
   4#include "super.h"
   5#include "mds_client.h"
   6
   7#include <linux/ceph/decode.h>
   8
   9#include <linux/xattr.h>
  10#include <linux/posix_acl_xattr.h>
  11#include <linux/slab.h>
  12
  13#define XATTR_CEPH_PREFIX "ceph."
  14#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  15
  16static int __remove_xattr(struct ceph_inode_info *ci,
  17                          struct ceph_inode_xattr *xattr);
  18
  19/*
  20 * List of handlers for synthetic system.* attributes. Other
  21 * attributes are handled directly.
  22 */
  23const struct xattr_handler *ceph_xattr_handlers[] = {
  24#ifdef CONFIG_CEPH_FS_POSIX_ACL
  25        &posix_acl_access_xattr_handler,
  26        &posix_acl_default_xattr_handler,
  27#endif
  28        NULL,
  29};
  30
  31static bool ceph_is_valid_xattr(const char *name)
  32{
  33        return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  34               !strncmp(name, XATTR_SECURITY_PREFIX,
  35                        XATTR_SECURITY_PREFIX_LEN) ||
  36               !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
  37               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  38               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  39}
  40
  41/*
  42 * These define virtual xattrs exposing the recursive directory
  43 * statistics and layout metadata.
  44 */
  45struct ceph_vxattr {
  46        char *name;
  47        size_t name_size;       /* strlen(name) + 1 (for '\0') */
  48        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  49                              size_t size);
  50        bool readonly, hidden;
  51        bool (*exists_cb)(struct ceph_inode_info *ci);
  52};
  53
  54/* layouts */
  55
  56static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  57{
  58        size_t s;
  59        char *p = (char *)&ci->i_layout;
  60
  61        for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  62                if (*p)
  63                        return true;
  64        return false;
  65}
  66
  67static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  68                                   size_t size)
  69{
  70        int ret;
  71        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  72        struct ceph_osd_client *osdc = &fsc->client->osdc;
  73        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  74        const char *pool_name;
  75        char buf[128];
  76
  77        dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  78        down_read(&osdc->map_sem);
  79        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  80        if (pool_name) {
  81                size_t len = strlen(pool_name);
  82                ret = snprintf(buf, sizeof(buf),
  83                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
  84                (unsigned long long)ceph_file_layout_su(ci->i_layout),
  85                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  86                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  87                if (!size) {
  88                        ret += len;
  89                } else if (ret + len > size) {
  90                        ret = -ERANGE;
  91                } else {
  92                        memcpy(val, buf, ret);
  93                        memcpy(val + ret, pool_name, len);
  94                        ret += len;
  95                }
  96        } else {
  97                ret = snprintf(buf, sizeof(buf),
  98                "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  99                (unsigned long long)ceph_file_layout_su(ci->i_layout),
 100                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
 101                (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
 102                (unsigned long long)pool);
 103                if (size) {
 104                        if (ret <= size)
 105                                memcpy(val, buf, ret);
 106                        else
 107                                ret = -ERANGE;
 108                }
 109        }
 110        up_read(&osdc->map_sem);
 111        return ret;
 112}
 113
 114static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
 115                                               char *val, size_t size)
 116{
 117        return snprintf(val, size, "%lld",
 118                        (unsigned long long)ceph_file_layout_su(ci->i_layout));
 119}
 120
 121static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
 122                                                char *val, size_t size)
 123{
 124        return snprintf(val, size, "%lld",
 125               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
 126}
 127
 128static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
 129                                               char *val, size_t size)
 130{
 131        return snprintf(val, size, "%lld",
 132               (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
 133}
 134
 135static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
 136                                        char *val, size_t size)
 137{
 138        int ret;
 139        struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
 140        struct ceph_osd_client *osdc = &fsc->client->osdc;
 141        s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
 142        const char *pool_name;
 143
 144        down_read(&osdc->map_sem);
 145        pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
 146        if (pool_name)
 147                ret = snprintf(val, size, "%s", pool_name);
 148        else
 149                ret = snprintf(val, size, "%lld", (unsigned long long)pool);
 150        up_read(&osdc->map_sem);
 151        return ret;
 152}
 153
 154/* directories */
 155
 156static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
 157                                        size_t size)
 158{
 159        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
 160}
 161
 162static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
 163                                      size_t size)
 164{
 165        return snprintf(val, size, "%lld", ci->i_files);
 166}
 167
 168static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
 169                                        size_t size)
 170{
 171        return snprintf(val, size, "%lld", ci->i_subdirs);
 172}
 173
 174static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
 175                                         size_t size)
 176{
 177        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
 178}
 179
 180static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
 181                                       size_t size)
 182{
 183        return snprintf(val, size, "%lld", ci->i_rfiles);
 184}
 185
 186static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
 187                                         size_t size)
 188{
 189        return snprintf(val, size, "%lld", ci->i_rsubdirs);
 190}
 191
 192static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
 193                                       size_t size)
 194{
 195        return snprintf(val, size, "%lld", ci->i_rbytes);
 196}
 197
 198static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 199                                       size_t size)
 200{
 201        return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
 202                        (long)ci->i_rctime.tv_nsec);
 203}
 204
 205
 206#define CEPH_XATTR_NAME(_type, _name)   XATTR_CEPH_PREFIX #_type "." #_name
 207#define CEPH_XATTR_NAME2(_type, _name, _name2)  \
 208        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
 209
 210#define XATTR_NAME_CEPH(_type, _name)                                   \
 211        {                                                               \
 212                .name = CEPH_XATTR_NAME(_type, _name),                  \
 213                .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
 214                .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
 215                .readonly = true,                               \
 216                .hidden = false,                                \
 217                .exists_cb = NULL,                      \
 218        }
 219#define XATTR_LAYOUT_FIELD(_type, _name, _field)                        \
 220        {                                                               \
 221                .name = CEPH_XATTR_NAME2(_type, _name, _field), \
 222                .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
 223                .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
 224                .readonly = false,                              \
 225                .hidden = true,                 \
 226                .exists_cb = ceph_vxattrcb_layout_exists,       \
 227        }
 228
 229static struct ceph_vxattr ceph_dir_vxattrs[] = {
 230        {
 231                .name = "ceph.dir.layout",
 232                .name_size = sizeof("ceph.dir.layout"),
 233                .getxattr_cb = ceph_vxattrcb_layout,
 234                .readonly = false,
 235                .hidden = true,
 236                .exists_cb = ceph_vxattrcb_layout_exists,
 237        },
 238        XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
 239        XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
 240        XATTR_LAYOUT_FIELD(dir, layout, object_size),
 241        XATTR_LAYOUT_FIELD(dir, layout, pool),
 242        XATTR_NAME_CEPH(dir, entries),
 243        XATTR_NAME_CEPH(dir, files),
 244        XATTR_NAME_CEPH(dir, subdirs),
 245        XATTR_NAME_CEPH(dir, rentries),
 246        XATTR_NAME_CEPH(dir, rfiles),
 247        XATTR_NAME_CEPH(dir, rsubdirs),
 248        XATTR_NAME_CEPH(dir, rbytes),
 249        XATTR_NAME_CEPH(dir, rctime),
 250        { .name = NULL, 0 }     /* Required table terminator */
 251};
 252static size_t ceph_dir_vxattrs_name_size;       /* total size of all names */
 253
 254/* files */
 255
 256static struct ceph_vxattr ceph_file_vxattrs[] = {
 257        {
 258                .name = "ceph.file.layout",
 259                .name_size = sizeof("ceph.file.layout"),
 260                .getxattr_cb = ceph_vxattrcb_layout,
 261                .readonly = false,
 262                .hidden = true,
 263                .exists_cb = ceph_vxattrcb_layout_exists,
 264        },
 265        XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
 266        XATTR_LAYOUT_FIELD(file, layout, stripe_count),
 267        XATTR_LAYOUT_FIELD(file, layout, object_size),
 268        XATTR_LAYOUT_FIELD(file, layout, pool),
 269        { .name = NULL, 0 }     /* Required table terminator */
 270};
 271static size_t ceph_file_vxattrs_name_size;      /* total size of all names */
 272
 273static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
 274{
 275        if (S_ISDIR(inode->i_mode))
 276                return ceph_dir_vxattrs;
 277        else if (S_ISREG(inode->i_mode))
 278                return ceph_file_vxattrs;
 279        return NULL;
 280}
 281
 282static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
 283{
 284        if (vxattrs == ceph_dir_vxattrs)
 285                return ceph_dir_vxattrs_name_size;
 286        if (vxattrs == ceph_file_vxattrs)
 287                return ceph_file_vxattrs_name_size;
 288        BUG_ON(vxattrs);
 289        return 0;
 290}
 291
 292/*
 293 * Compute the aggregate size (including terminating '\0') of all
 294 * virtual extended attribute names in the given vxattr table.
 295 */
 296static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
 297{
 298        struct ceph_vxattr *vxattr;
 299        size_t size = 0;
 300
 301        for (vxattr = vxattrs; vxattr->name; vxattr++)
 302                if (!vxattr->hidden)
 303                        size += vxattr->name_size;
 304
 305        return size;
 306}
 307
 308/* Routines called at initialization and exit time */
 309
 310void __init ceph_xattr_init(void)
 311{
 312        ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
 313        ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
 314}
 315
 316void ceph_xattr_exit(void)
 317{
 318        ceph_dir_vxattrs_name_size = 0;
 319        ceph_file_vxattrs_name_size = 0;
 320}
 321
 322static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 323                                                const char *name)
 324{
 325        struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
 326
 327        if (vxattr) {
 328                while (vxattr->name) {
 329                        if (!strcmp(vxattr->name, name))
 330                                return vxattr;
 331                        vxattr++;
 332                }
 333        }
 334
 335        return NULL;
 336}
 337
 338static int __set_xattr(struct ceph_inode_info *ci,
 339                           const char *name, int name_len,
 340                           const char *val, int val_len,
 341                           int flags, int update_xattr,
 342                           struct ceph_inode_xattr **newxattr)
 343{
 344        struct rb_node **p;
 345        struct rb_node *parent = NULL;
 346        struct ceph_inode_xattr *xattr = NULL;
 347        int c;
 348        int new = 0;
 349
 350        p = &ci->i_xattrs.index.rb_node;
 351        while (*p) {
 352                parent = *p;
 353                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 354                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 355                if (c < 0)
 356                        p = &(*p)->rb_left;
 357                else if (c > 0)
 358                        p = &(*p)->rb_right;
 359                else {
 360                        if (name_len == xattr->name_len)
 361                                break;
 362                        else if (name_len < xattr->name_len)
 363                                p = &(*p)->rb_left;
 364                        else
 365                                p = &(*p)->rb_right;
 366                }
 367                xattr = NULL;
 368        }
 369
 370        if (update_xattr) {
 371                int err = 0;
 372                if (xattr && (flags & XATTR_CREATE))
 373                        err = -EEXIST;
 374                else if (!xattr && (flags & XATTR_REPLACE))
 375                        err = -ENODATA;
 376                if (err) {
 377                        kfree(name);
 378                        kfree(val);
 379                        return err;
 380                }
 381                if (update_xattr < 0) {
 382                        if (xattr)
 383                                __remove_xattr(ci, xattr);
 384                        kfree(name);
 385                        return 0;
 386                }
 387        }
 388
 389        if (!xattr) {
 390                new = 1;
 391                xattr = *newxattr;
 392                xattr->name = name;
 393                xattr->name_len = name_len;
 394                xattr->should_free_name = update_xattr;
 395
 396                ci->i_xattrs.count++;
 397                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 398        } else {
 399                kfree(*newxattr);
 400                *newxattr = NULL;
 401                if (xattr->should_free_val)
 402                        kfree((void *)xattr->val);
 403
 404                if (update_xattr) {
 405                        kfree((void *)name);
 406                        name = xattr->name;
 407                }
 408                ci->i_xattrs.names_size -= xattr->name_len;
 409                ci->i_xattrs.vals_size -= xattr->val_len;
 410        }
 411        ci->i_xattrs.names_size += name_len;
 412        ci->i_xattrs.vals_size += val_len;
 413        if (val)
 414                xattr->val = val;
 415        else
 416                xattr->val = "";
 417
 418        xattr->val_len = val_len;
 419        xattr->dirty = update_xattr;
 420        xattr->should_free_val = (val && update_xattr);
 421
 422        if (new) {
 423                rb_link_node(&xattr->node, parent, p);
 424                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 425                dout("__set_xattr_val p=%p\n", p);
 426        }
 427
 428        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 429             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 430
 431        return 0;
 432}
 433
 434static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 435                           const char *name)
 436{
 437        struct rb_node **p;
 438        struct rb_node *parent = NULL;
 439        struct ceph_inode_xattr *xattr = NULL;
 440        int name_len = strlen(name);
 441        int c;
 442
 443        p = &ci->i_xattrs.index.rb_node;
 444        while (*p) {
 445                parent = *p;
 446                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 447                c = strncmp(name, xattr->name, xattr->name_len);
 448                if (c == 0 && name_len > xattr->name_len)
 449                        c = 1;
 450                if (c < 0)
 451                        p = &(*p)->rb_left;
 452                else if (c > 0)
 453                        p = &(*p)->rb_right;
 454                else {
 455                        dout("__get_xattr %s: found %.*s\n", name,
 456                             xattr->val_len, xattr->val);
 457                        return xattr;
 458                }
 459        }
 460
 461        dout("__get_xattr %s: not found\n", name);
 462
 463        return NULL;
 464}
 465
 466static void __free_xattr(struct ceph_inode_xattr *xattr)
 467{
 468        BUG_ON(!xattr);
 469
 470        if (xattr->should_free_name)
 471                kfree((void *)xattr->name);
 472        if (xattr->should_free_val)
 473                kfree((void *)xattr->val);
 474
 475        kfree(xattr);
 476}
 477
 478static int __remove_xattr(struct ceph_inode_info *ci,
 479                          struct ceph_inode_xattr *xattr)
 480{
 481        if (!xattr)
 482                return -ENODATA;
 483
 484        rb_erase(&xattr->node, &ci->i_xattrs.index);
 485
 486        if (xattr->should_free_name)
 487                kfree((void *)xattr->name);
 488        if (xattr->should_free_val)
 489                kfree((void *)xattr->val);
 490
 491        ci->i_xattrs.names_size -= xattr->name_len;
 492        ci->i_xattrs.vals_size -= xattr->val_len;
 493        ci->i_xattrs.count--;
 494        kfree(xattr);
 495
 496        return 0;
 497}
 498
 499static int __remove_xattr_by_name(struct ceph_inode_info *ci,
 500                           const char *name)
 501{
 502        struct rb_node **p;
 503        struct ceph_inode_xattr *xattr;
 504        int err;
 505
 506        p = &ci->i_xattrs.index.rb_node;
 507        xattr = __get_xattr(ci, name);
 508        err = __remove_xattr(ci, xattr);
 509        return err;
 510}
 511
 512static char *__copy_xattr_names(struct ceph_inode_info *ci,
 513                                char *dest)
 514{
 515        struct rb_node *p;
 516        struct ceph_inode_xattr *xattr = NULL;
 517
 518        p = rb_first(&ci->i_xattrs.index);
 519        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 520
 521        while (p) {
 522                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 523                memcpy(dest, xattr->name, xattr->name_len);
 524                dest[xattr->name_len] = '\0';
 525
 526                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 527                     xattr->name_len, ci->i_xattrs.names_size);
 528
 529                dest += xattr->name_len + 1;
 530                p = rb_next(p);
 531        }
 532
 533        return dest;
 534}
 535
 536void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 537{
 538        struct rb_node *p, *tmp;
 539        struct ceph_inode_xattr *xattr = NULL;
 540
 541        p = rb_first(&ci->i_xattrs.index);
 542
 543        dout("__ceph_destroy_xattrs p=%p\n", p);
 544
 545        while (p) {
 546                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 547                tmp = p;
 548                p = rb_next(tmp);
 549                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 550                     xattr->name_len, xattr->name);
 551                rb_erase(tmp, &ci->i_xattrs.index);
 552
 553                __free_xattr(xattr);
 554        }
 555
 556        ci->i_xattrs.names_size = 0;
 557        ci->i_xattrs.vals_size = 0;
 558        ci->i_xattrs.index_version = 0;
 559        ci->i_xattrs.count = 0;
 560        ci->i_xattrs.index = RB_ROOT;
 561}
 562
 563static int __build_xattrs(struct inode *inode)
 564        __releases(ci->i_ceph_lock)
 565        __acquires(ci->i_ceph_lock)
 566{
 567        u32 namelen;
 568        u32 numattr = 0;
 569        void *p, *end;
 570        u32 len;
 571        const char *name, *val;
 572        struct ceph_inode_info *ci = ceph_inode(inode);
 573        int xattr_version;
 574        struct ceph_inode_xattr **xattrs = NULL;
 575        int err = 0;
 576        int i;
 577
 578        dout("__build_xattrs() len=%d\n",
 579             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 580
 581        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 582                return 0; /* already built */
 583
 584        __ceph_destroy_xattrs(ci);
 585
 586start:
 587        /* updated internal xattr rb tree */
 588        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 589                p = ci->i_xattrs.blob->vec.iov_base;
 590                end = p + ci->i_xattrs.blob->vec.iov_len;
 591                ceph_decode_32_safe(&p, end, numattr, bad);
 592                xattr_version = ci->i_xattrs.version;
 593                spin_unlock(&ci->i_ceph_lock);
 594
 595                xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
 596                                 GFP_NOFS);
 597                err = -ENOMEM;
 598                if (!xattrs)
 599                        goto bad_lock;
 600
 601                for (i = 0; i < numattr; i++) {
 602                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 603                                            GFP_NOFS);
 604                        if (!xattrs[i])
 605                                goto bad_lock;
 606                }
 607
 608                spin_lock(&ci->i_ceph_lock);
 609                if (ci->i_xattrs.version != xattr_version) {
 610                        /* lost a race, retry */
 611                        for (i = 0; i < numattr; i++)
 612                                kfree(xattrs[i]);
 613                        kfree(xattrs);
 614                        xattrs = NULL;
 615                        goto start;
 616                }
 617                err = -EIO;
 618                while (numattr--) {
 619                        ceph_decode_32_safe(&p, end, len, bad);
 620                        namelen = len;
 621                        name = p;
 622                        p += len;
 623                        ceph_decode_32_safe(&p, end, len, bad);
 624                        val = p;
 625                        p += len;
 626
 627                        err = __set_xattr(ci, name, namelen, val, len,
 628                                          0, 0, &xattrs[numattr]);
 629
 630                        if (err < 0)
 631                                goto bad;
 632                }
 633                kfree(xattrs);
 634        }
 635        ci->i_xattrs.index_version = ci->i_xattrs.version;
 636        ci->i_xattrs.dirty = false;
 637
 638        return err;
 639bad_lock:
 640        spin_lock(&ci->i_ceph_lock);
 641bad:
 642        if (xattrs) {
 643                for (i = 0; i < numattr; i++)
 644                        kfree(xattrs[i]);
 645                kfree(xattrs);
 646        }
 647        ci->i_xattrs.names_size = 0;
 648        return err;
 649}
 650
 651static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 652                                    int val_size)
 653{
 654        /*
 655         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 656         * 4 bytes per each value
 657         */
 658        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 659                             ci->i_xattrs.names_size +
 660                             ci->i_xattrs.vals_size;
 661        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 662             ci->i_xattrs.count, ci->i_xattrs.names_size,
 663             ci->i_xattrs.vals_size);
 664
 665        if (name_size)
 666                size += 4 + 4 + name_size + val_size;
 667
 668        return size;
 669}
 670
 671/*
 672 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 673 * and swap into place.
 674 */
 675void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 676{
 677        struct rb_node *p;
 678        struct ceph_inode_xattr *xattr = NULL;
 679        void *dest;
 680
 681        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 682        if (ci->i_xattrs.dirty) {
 683                int need = __get_required_blob_size(ci, 0, 0);
 684
 685                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 686
 687                p = rb_first(&ci->i_xattrs.index);
 688                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 689
 690                ceph_encode_32(&dest, ci->i_xattrs.count);
 691                while (p) {
 692                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 693
 694                        ceph_encode_32(&dest, xattr->name_len);
 695                        memcpy(dest, xattr->name, xattr->name_len);
 696                        dest += xattr->name_len;
 697                        ceph_encode_32(&dest, xattr->val_len);
 698                        memcpy(dest, xattr->val, xattr->val_len);
 699                        dest += xattr->val_len;
 700
 701                        p = rb_next(p);
 702                }
 703
 704                /* adjust buffer len; it may be larger than we need */
 705                ci->i_xattrs.prealloc_blob->vec.iov_len =
 706                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 707
 708                if (ci->i_xattrs.blob)
 709                        ceph_buffer_put(ci->i_xattrs.blob);
 710                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 711                ci->i_xattrs.prealloc_blob = NULL;
 712                ci->i_xattrs.dirty = false;
 713                ci->i_xattrs.version++;
 714        }
 715}
 716
 717static inline int __get_request_mask(struct inode *in) {
 718        struct ceph_mds_request *req = current->journal_info;
 719        int mask = 0;
 720        if (req && req->r_target_inode == in) {
 721                if (req->r_op == CEPH_MDS_OP_LOOKUP ||
 722                    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
 723                    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
 724                    req->r_op == CEPH_MDS_OP_GETATTR) {
 725                        mask = le32_to_cpu(req->r_args.getattr.mask);
 726                } else if (req->r_op == CEPH_MDS_OP_OPEN ||
 727                           req->r_op == CEPH_MDS_OP_CREATE) {
 728                        mask = le32_to_cpu(req->r_args.open.mask);
 729                }
 730        }
 731        return mask;
 732}
 733
 734ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 735                      size_t size)
 736{
 737        struct ceph_inode_info *ci = ceph_inode(inode);
 738        struct ceph_inode_xattr *xattr;
 739        struct ceph_vxattr *vxattr = NULL;
 740        int req_mask;
 741        int err;
 742
 743        if (!ceph_is_valid_xattr(name))
 744                return -ENODATA;
 745
 746        /* let's see if a virtual xattr was requested */
 747        vxattr = ceph_match_vxattr(inode, name);
 748        if (vxattr) {
 749                err = -ENODATA;
 750                if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
 751                        err = vxattr->getxattr_cb(ci, value, size);
 752                return err;
 753        }
 754
 755        req_mask = __get_request_mask(inode);
 756
 757        spin_lock(&ci->i_ceph_lock);
 758        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 759             ci->i_xattrs.version, ci->i_xattrs.index_version);
 760
 761        if (ci->i_xattrs.version == 0 ||
 762            !((req_mask & CEPH_CAP_XATTR_SHARED) ||
 763              __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
 764                spin_unlock(&ci->i_ceph_lock);
 765
 766                /* security module gets xattr while filling trace */
 767                if (current->journal_info != NULL) {
 768                        pr_warn_ratelimited("sync getxattr %p "
 769                                            "during filling trace\n", inode);
 770                        return -EBUSY;
 771                }
 772
 773                /* get xattrs from mds (if we don't already have them) */
 774                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 775                if (err)
 776                        return err;
 777                spin_lock(&ci->i_ceph_lock);
 778        }
 779
 780        err = __build_xattrs(inode);
 781        if (err < 0)
 782                goto out;
 783
 784        err = -ENODATA;  /* == ENOATTR */
 785        xattr = __get_xattr(ci, name);
 786        if (!xattr)
 787                goto out;
 788
 789        err = -ERANGE;
 790        if (size && size < xattr->val_len)
 791                goto out;
 792
 793        err = xattr->val_len;
 794        if (size == 0)
 795                goto out;
 796
 797        memcpy(value, xattr->val, xattr->val_len);
 798
 799        if (current->journal_info != NULL &&
 800            !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
 801                ci->i_ceph_flags |= CEPH_I_SEC_INITED;
 802out:
 803        spin_unlock(&ci->i_ceph_lock);
 804        return err;
 805}
 806
 807ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 808                      size_t size)
 809{
 810        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 811                return generic_getxattr(dentry, name, value, size);
 812
 813        return __ceph_getxattr(d_inode(dentry), name, value, size);
 814}
 815
 816ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 817{
 818        struct inode *inode = d_inode(dentry);
 819        struct ceph_inode_info *ci = ceph_inode(inode);
 820        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
 821        u32 vir_namelen = 0;
 822        u32 namelen;
 823        int err;
 824        u32 len;
 825        int i;
 826
 827        spin_lock(&ci->i_ceph_lock);
 828        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 829             ci->i_xattrs.version, ci->i_xattrs.index_version);
 830
 831        if (ci->i_xattrs.version == 0 ||
 832            !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
 833                spin_unlock(&ci->i_ceph_lock);
 834                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
 835                if (err)
 836                        return err;
 837                spin_lock(&ci->i_ceph_lock);
 838        }
 839
 840        err = __build_xattrs(inode);
 841        if (err < 0)
 842                goto out;
 843        /*
 844         * Start with virtual dir xattr names (if any) (including
 845         * terminating '\0' characters for each).
 846         */
 847        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 848
 849        /* adding 1 byte per each variable due to the null termination */
 850        namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
 851        err = -ERANGE;
 852        if (size && vir_namelen + namelen > size)
 853                goto out;
 854
 855        err = namelen + vir_namelen;
 856        if (size == 0)
 857                goto out;
 858
 859        names = __copy_xattr_names(ci, names);
 860
 861        /* virtual xattr names, too */
 862        err = namelen;
 863        if (vxattrs) {
 864                for (i = 0; vxattrs[i].name; i++) {
 865                        if (!vxattrs[i].hidden &&
 866                            !(vxattrs[i].exists_cb &&
 867                              !vxattrs[i].exists_cb(ci))) {
 868                                len = sprintf(names, "%s", vxattrs[i].name);
 869                                names += len + 1;
 870                                err += len + 1;
 871                        }
 872                }
 873        }
 874
 875out:
 876        spin_unlock(&ci->i_ceph_lock);
 877        return err;
 878}
 879
 880static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 881                              const char *value, size_t size, int flags)
 882{
 883        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 884        struct inode *inode = d_inode(dentry);
 885        struct ceph_inode_info *ci = ceph_inode(inode);
 886        struct ceph_mds_request *req;
 887        struct ceph_mds_client *mdsc = fsc->mdsc;
 888        struct ceph_pagelist *pagelist = NULL;
 889        int err;
 890
 891        if (size > 0) {
 892                /* copy value into pagelist */
 893                pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
 894                if (!pagelist)
 895                        return -ENOMEM;
 896
 897                ceph_pagelist_init(pagelist);
 898                err = ceph_pagelist_append(pagelist, value, size);
 899                if (err)
 900                        goto out;
 901        } else if (!value) {
 902                flags |= CEPH_XATTR_REMOVE;
 903        }
 904
 905        dout("setxattr value=%.*s\n", (int)size, value);
 906
 907        /* do request */
 908        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
 909                                       USE_AUTH_MDS);
 910        if (IS_ERR(req)) {
 911                err = PTR_ERR(req);
 912                goto out;
 913        }
 914
 915        req->r_args.setxattr.flags = cpu_to_le32(flags);
 916        req->r_path2 = kstrdup(name, GFP_NOFS);
 917        if (!req->r_path2) {
 918                ceph_mdsc_put_request(req);
 919                err = -ENOMEM;
 920                goto out;
 921        }
 922
 923        req->r_pagelist = pagelist;
 924        pagelist = NULL;
 925
 926        req->r_inode = inode;
 927        ihold(inode);
 928        req->r_num_caps = 1;
 929        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 930
 931        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 932        err = ceph_mdsc_do_request(mdsc, NULL, req);
 933        ceph_mdsc_put_request(req);
 934        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 935
 936out:
 937        if (pagelist)
 938                ceph_pagelist_release(pagelist);
 939        return err;
 940}
 941
 942int __ceph_setxattr(struct dentry *dentry, const char *name,
 943                        const void *value, size_t size, int flags)
 944{
 945        struct inode *inode = d_inode(dentry);
 946        struct ceph_vxattr *vxattr;
 947        struct ceph_inode_info *ci = ceph_inode(inode);
 948        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
 949        struct ceph_cap_flush *prealloc_cf = NULL;
 950        int issued;
 951        int err;
 952        int dirty = 0;
 953        int name_len = strlen(name);
 954        int val_len = size;
 955        char *newname = NULL;
 956        char *newval = NULL;
 957        struct ceph_inode_xattr *xattr = NULL;
 958        int required_blob_size;
 959        bool lock_snap_rwsem = false;
 960
 961        if (!ceph_is_valid_xattr(name))
 962                return -EOPNOTSUPP;
 963
 964        vxattr = ceph_match_vxattr(inode, name);
 965        if (vxattr && vxattr->readonly)
 966                return -EOPNOTSUPP;
 967
 968        /* pass any unhandled ceph.* xattrs through to the MDS */
 969        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
 970                goto do_sync_unlocked;
 971
 972        /* preallocate memory for xattr name, value, index node */
 973        err = -ENOMEM;
 974        newname = kmemdup(name, name_len + 1, GFP_NOFS);
 975        if (!newname)
 976                goto out;
 977
 978        if (val_len) {
 979                newval = kmemdup(value, val_len, GFP_NOFS);
 980                if (!newval)
 981                        goto out;
 982        }
 983
 984        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
 985        if (!xattr)
 986                goto out;
 987
 988        prealloc_cf = ceph_alloc_cap_flush();
 989        if (!prealloc_cf)
 990                goto out;
 991
 992        spin_lock(&ci->i_ceph_lock);
 993retry:
 994        issued = __ceph_caps_issued(ci, NULL);
 995        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
 996                goto do_sync;
 997
 998        if (!lock_snap_rwsem && !ci->i_head_snapc) {
 999                lock_snap_rwsem = true;
1000                if (!down_read_trylock(&mdsc->snap_rwsem)) {
1001                        spin_unlock(&ci->i_ceph_lock);
1002                        down_read(&mdsc->snap_rwsem);
1003                        spin_lock(&ci->i_ceph_lock);
1004                        goto retry;
1005                }
1006        }
1007
1008        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
1009        __build_xattrs(inode);
1010
1011        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1012
1013        if (!ci->i_xattrs.prealloc_blob ||
1014            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1015                struct ceph_buffer *blob;
1016
1017                spin_unlock(&ci->i_ceph_lock);
1018                dout(" preaallocating new blob size=%d\n", required_blob_size);
1019                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1020                if (!blob)
1021                        goto do_sync_unlocked;
1022                spin_lock(&ci->i_ceph_lock);
1023                if (ci->i_xattrs.prealloc_blob)
1024                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1025                ci->i_xattrs.prealloc_blob = blob;
1026                goto retry;
1027        }
1028
1029        err = __set_xattr(ci, newname, name_len, newval, val_len,
1030                          flags, value ? 1 : -1, &xattr);
1031
1032        if (!err) {
1033                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1034                                               &prealloc_cf);
1035                ci->i_xattrs.dirty = true;
1036                inode->i_ctime = current_fs_time(inode->i_sb);
1037        }
1038
1039        spin_unlock(&ci->i_ceph_lock);
1040        if (lock_snap_rwsem)
1041                up_read(&mdsc->snap_rwsem);
1042        if (dirty)
1043                __mark_inode_dirty(inode, dirty);
1044        ceph_free_cap_flush(prealloc_cf);
1045        return err;
1046
1047do_sync:
1048        spin_unlock(&ci->i_ceph_lock);
1049do_sync_unlocked:
1050        if (lock_snap_rwsem)
1051                up_read(&mdsc->snap_rwsem);
1052
1053        /* security module set xattr while filling trace */
1054        if (current->journal_info != NULL) {
1055                pr_warn_ratelimited("sync setxattr %p "
1056                                    "during filling trace\n", inode);
1057                err = -EBUSY;
1058        } else {
1059                err = ceph_sync_setxattr(dentry, name, value, size, flags);
1060        }
1061out:
1062        ceph_free_cap_flush(prealloc_cf);
1063        kfree(newname);
1064        kfree(newval);
1065        kfree(xattr);
1066        return err;
1067}
1068
1069int ceph_setxattr(struct dentry *dentry, const char *name,
1070                  const void *value, size_t size, int flags)
1071{
1072        if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
1073                return -EROFS;
1074
1075        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1076                return generic_setxattr(dentry, name, value, size, flags);
1077
1078        if (size == 0)
1079                value = "";  /* empty EA, do not remove */
1080
1081        return __ceph_setxattr(dentry, name, value, size, flags);
1082}
1083
1084static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1085{
1086        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
1087        struct ceph_mds_client *mdsc = fsc->mdsc;
1088        struct inode *inode = d_inode(dentry);
1089        struct ceph_mds_request *req;
1090        int err;
1091
1092        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
1093                                       USE_AUTH_MDS);
1094        if (IS_ERR(req))
1095                return PTR_ERR(req);
1096        req->r_path2 = kstrdup(name, GFP_NOFS);
1097        if (!req->r_path2)
1098                return -ENOMEM;
1099
1100        req->r_inode = inode;
1101        ihold(inode);
1102        req->r_num_caps = 1;
1103        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1104        err = ceph_mdsc_do_request(mdsc, NULL, req);
1105        ceph_mdsc_put_request(req);
1106        return err;
1107}
1108
1109int __ceph_removexattr(struct dentry *dentry, const char *name)
1110{
1111        struct inode *inode = d_inode(dentry);
1112        struct ceph_vxattr *vxattr;
1113        struct ceph_inode_info *ci = ceph_inode(inode);
1114        struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1115        struct ceph_cap_flush *prealloc_cf = NULL;
1116        int issued;
1117        int err;
1118        int required_blob_size;
1119        int dirty;
1120        bool lock_snap_rwsem = false;
1121
1122        if (!ceph_is_valid_xattr(name))
1123                return -EOPNOTSUPP;
1124
1125        vxattr = ceph_match_vxattr(inode, name);
1126        if (vxattr && vxattr->readonly)
1127                return -EOPNOTSUPP;
1128
1129        /* pass any unhandled ceph.* xattrs through to the MDS */
1130        if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1131                goto do_sync_unlocked;
1132
1133        prealloc_cf = ceph_alloc_cap_flush();
1134        if (!prealloc_cf)
1135                return -ENOMEM;
1136
1137        err = -ENOMEM;
1138        spin_lock(&ci->i_ceph_lock);
1139retry:
1140        issued = __ceph_caps_issued(ci, NULL);
1141        if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1142                goto do_sync;
1143
1144        if (!lock_snap_rwsem && !ci->i_head_snapc) {
1145                lock_snap_rwsem = true;
1146                if (!down_read_trylock(&mdsc->snap_rwsem)) {
1147                        spin_unlock(&ci->i_ceph_lock);
1148                        down_read(&mdsc->snap_rwsem);
1149                        spin_lock(&ci->i_ceph_lock);
1150                        goto retry;
1151                }
1152        }
1153
1154        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1155
1156        __build_xattrs(inode);
1157
1158        required_blob_size = __get_required_blob_size(ci, 0, 0);
1159
1160        if (!ci->i_xattrs.prealloc_blob ||
1161            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1162                struct ceph_buffer *blob;
1163
1164                spin_unlock(&ci->i_ceph_lock);
1165                dout(" preaallocating new blob size=%d\n", required_blob_size);
1166                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1167                if (!blob)
1168                        goto do_sync_unlocked;
1169                spin_lock(&ci->i_ceph_lock);
1170                if (ci->i_xattrs.prealloc_blob)
1171                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1172                ci->i_xattrs.prealloc_blob = blob;
1173                goto retry;
1174        }
1175
1176        err = __remove_xattr_by_name(ceph_inode(inode), name);
1177
1178        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1179                                       &prealloc_cf);
1180        ci->i_xattrs.dirty = true;
1181        inode->i_ctime = current_fs_time(inode->i_sb);
1182        spin_unlock(&ci->i_ceph_lock);
1183        if (lock_snap_rwsem)
1184                up_read(&mdsc->snap_rwsem);
1185        if (dirty)
1186                __mark_inode_dirty(inode, dirty);
1187        ceph_free_cap_flush(prealloc_cf);
1188        return err;
1189do_sync:
1190        spin_unlock(&ci->i_ceph_lock);
1191do_sync_unlocked:
1192        if (lock_snap_rwsem)
1193                up_read(&mdsc->snap_rwsem);
1194        ceph_free_cap_flush(prealloc_cf);
1195        err = ceph_send_removexattr(dentry, name);
1196        return err;
1197}
1198
1199int ceph_removexattr(struct dentry *dentry, const char *name)
1200{
1201        if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
1202                return -EROFS;
1203
1204        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1205                return generic_removexattr(dentry, name);
1206
1207        return __ceph_removexattr(dentry, name);
1208}
1209
1210#ifdef CONFIG_SECURITY
1211bool ceph_security_xattr_wanted(struct inode *in)
1212{
1213        return in->i_security != NULL;
1214}
1215
1216bool ceph_security_xattr_deadlock(struct inode *in)
1217{
1218        struct ceph_inode_info *ci;
1219        bool ret;
1220        if (in->i_security == NULL)
1221                return false;
1222        ci = ceph_inode(in);
1223        spin_lock(&ci->i_ceph_lock);
1224        ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1225              !(ci->i_xattrs.version > 0 &&
1226                __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1227        spin_unlock(&ci->i_ceph_lock);
1228        return ret;
1229}
1230#endif
1231