linux/fs/ceph/xattr.c
<<
>>
Prefs
   1#include <linux/ceph/ceph_debug.h>
   2
   3#include "super.h"
   4#include "mds_client.h"
   5
   6#include <linux/ceph/decode.h>
   7
   8#include <linux/xattr.h>
   9#include <linux/slab.h>
  10
  11static bool ceph_is_valid_xattr(const char *name)
  12{
  13        return !strncmp(name, "ceph.", 5) ||
  14               !strncmp(name, XATTR_SECURITY_PREFIX,
  15                        XATTR_SECURITY_PREFIX_LEN) ||
  16               !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  17               !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  18}
  19
  20/*
  21 * These define virtual xattrs exposing the recursive directory
  22 * statistics and layout metadata.
  23 */
  24struct ceph_vxattr_cb {
  25        bool readonly;
  26        char *name;
  27        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  28                              size_t size);
  29};
  30
  31/* directories */
  32
  33static size_t ceph_vxattrcb_entries(struct ceph_inode_info *ci, char *val,
  34                                        size_t size)
  35{
  36        return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
  37}
  38
  39static size_t ceph_vxattrcb_files(struct ceph_inode_info *ci, char *val,
  40                                      size_t size)
  41{
  42        return snprintf(val, size, "%lld", ci->i_files);
  43}
  44
  45static size_t ceph_vxattrcb_subdirs(struct ceph_inode_info *ci, char *val,
  46                                        size_t size)
  47{
  48        return snprintf(val, size, "%lld", ci->i_subdirs);
  49}
  50
  51static size_t ceph_vxattrcb_rentries(struct ceph_inode_info *ci, char *val,
  52                                         size_t size)
  53{
  54        return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
  55}
  56
  57static size_t ceph_vxattrcb_rfiles(struct ceph_inode_info *ci, char *val,
  58                                       size_t size)
  59{
  60        return snprintf(val, size, "%lld", ci->i_rfiles);
  61}
  62
  63static size_t ceph_vxattrcb_rsubdirs(struct ceph_inode_info *ci, char *val,
  64                                         size_t size)
  65{
  66        return snprintf(val, size, "%lld", ci->i_rsubdirs);
  67}
  68
  69static size_t ceph_vxattrcb_rbytes(struct ceph_inode_info *ci, char *val,
  70                                       size_t size)
  71{
  72        return snprintf(val, size, "%lld", ci->i_rbytes);
  73}
  74
  75static size_t ceph_vxattrcb_rctime(struct ceph_inode_info *ci, char *val,
  76                                       size_t size)
  77{
  78        return snprintf(val, size, "%ld.%ld", (long)ci->i_rctime.tv_sec,
  79                        (long)ci->i_rctime.tv_nsec);
  80}
  81
  82static struct ceph_vxattr_cb ceph_dir_vxattrs[] = {
  83        { true, "ceph.dir.entries", ceph_vxattrcb_entries},
  84        { true, "ceph.dir.files", ceph_vxattrcb_files},
  85        { true, "ceph.dir.subdirs", ceph_vxattrcb_subdirs},
  86        { true, "ceph.dir.rentries", ceph_vxattrcb_rentries},
  87        { true, "ceph.dir.rfiles", ceph_vxattrcb_rfiles},
  88        { true, "ceph.dir.rsubdirs", ceph_vxattrcb_rsubdirs},
  89        { true, "ceph.dir.rbytes", ceph_vxattrcb_rbytes},
  90        { true, "ceph.dir.rctime", ceph_vxattrcb_rctime},
  91        { true, NULL, NULL }
  92};
  93
  94/* files */
  95
  96static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  97                                   size_t size)
  98{
  99        int ret;
 100
 101        ret = snprintf(val, size,
 102                "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
 103                (unsigned long long)ceph_file_layout_su(ci->i_layout),
 104                (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
 105                (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
 106        if (ceph_file_layout_pg_preferred(ci->i_layout))
 107                ret += snprintf(val + ret, size, "preferred_osd=%lld\n",
 108                            (unsigned long long)ceph_file_layout_pg_preferred(
 109                                    ci->i_layout));
 110        return ret;
 111}
 112
 113static struct ceph_vxattr_cb ceph_file_vxattrs[] = {
 114        { true, "ceph.layout", ceph_vxattrcb_layout},
 115        { NULL, NULL }
 116};
 117
 118static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
 119{
 120        if (S_ISDIR(inode->i_mode))
 121                return ceph_dir_vxattrs;
 122        else if (S_ISREG(inode->i_mode))
 123                return ceph_file_vxattrs;
 124        return NULL;
 125}
 126
 127static struct ceph_vxattr_cb *ceph_match_vxattr(struct ceph_vxattr_cb *vxattr,
 128                                                const char *name)
 129{
 130        do {
 131                if (strcmp(vxattr->name, name) == 0)
 132                        return vxattr;
 133                vxattr++;
 134        } while (vxattr->name);
 135        return NULL;
 136}
 137
 138static int __set_xattr(struct ceph_inode_info *ci,
 139                           const char *name, int name_len,
 140                           const char *val, int val_len,
 141                           int dirty,
 142                           int should_free_name, int should_free_val,
 143                           struct ceph_inode_xattr **newxattr)
 144{
 145        struct rb_node **p;
 146        struct rb_node *parent = NULL;
 147        struct ceph_inode_xattr *xattr = NULL;
 148        int c;
 149        int new = 0;
 150
 151        p = &ci->i_xattrs.index.rb_node;
 152        while (*p) {
 153                parent = *p;
 154                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 155                c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
 156                if (c < 0)
 157                        p = &(*p)->rb_left;
 158                else if (c > 0)
 159                        p = &(*p)->rb_right;
 160                else {
 161                        if (name_len == xattr->name_len)
 162                                break;
 163                        else if (name_len < xattr->name_len)
 164                                p = &(*p)->rb_left;
 165                        else
 166                                p = &(*p)->rb_right;
 167                }
 168                xattr = NULL;
 169        }
 170
 171        if (!xattr) {
 172                new = 1;
 173                xattr = *newxattr;
 174                xattr->name = name;
 175                xattr->name_len = name_len;
 176                xattr->should_free_name = should_free_name;
 177
 178                ci->i_xattrs.count++;
 179                dout("__set_xattr count=%d\n", ci->i_xattrs.count);
 180        } else {
 181                kfree(*newxattr);
 182                *newxattr = NULL;
 183                if (xattr->should_free_val)
 184                        kfree((void *)xattr->val);
 185
 186                if (should_free_name) {
 187                        kfree((void *)name);
 188                        name = xattr->name;
 189                }
 190                ci->i_xattrs.names_size -= xattr->name_len;
 191                ci->i_xattrs.vals_size -= xattr->val_len;
 192        }
 193        ci->i_xattrs.names_size += name_len;
 194        ci->i_xattrs.vals_size += val_len;
 195        if (val)
 196                xattr->val = val;
 197        else
 198                xattr->val = "";
 199
 200        xattr->val_len = val_len;
 201        xattr->dirty = dirty;
 202        xattr->should_free_val = (val && should_free_val);
 203
 204        if (new) {
 205                rb_link_node(&xattr->node, parent, p);
 206                rb_insert_color(&xattr->node, &ci->i_xattrs.index);
 207                dout("__set_xattr_val p=%p\n", p);
 208        }
 209
 210        dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
 211             ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
 212
 213        return 0;
 214}
 215
 216static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
 217                           const char *name)
 218{
 219        struct rb_node **p;
 220        struct rb_node *parent = NULL;
 221        struct ceph_inode_xattr *xattr = NULL;
 222        int name_len = strlen(name);
 223        int c;
 224
 225        p = &ci->i_xattrs.index.rb_node;
 226        while (*p) {
 227                parent = *p;
 228                xattr = rb_entry(parent, struct ceph_inode_xattr, node);
 229                c = strncmp(name, xattr->name, xattr->name_len);
 230                if (c == 0 && name_len > xattr->name_len)
 231                        c = 1;
 232                if (c < 0)
 233                        p = &(*p)->rb_left;
 234                else if (c > 0)
 235                        p = &(*p)->rb_right;
 236                else {
 237                        dout("__get_xattr %s: found %.*s\n", name,
 238                             xattr->val_len, xattr->val);
 239                        return xattr;
 240                }
 241        }
 242
 243        dout("__get_xattr %s: not found\n", name);
 244
 245        return NULL;
 246}
 247
 248static void __free_xattr(struct ceph_inode_xattr *xattr)
 249{
 250        BUG_ON(!xattr);
 251
 252        if (xattr->should_free_name)
 253                kfree((void *)xattr->name);
 254        if (xattr->should_free_val)
 255                kfree((void *)xattr->val);
 256
 257        kfree(xattr);
 258}
 259
 260static int __remove_xattr(struct ceph_inode_info *ci,
 261                          struct ceph_inode_xattr *xattr)
 262{
 263        if (!xattr)
 264                return -EOPNOTSUPP;
 265
 266        rb_erase(&xattr->node, &ci->i_xattrs.index);
 267
 268        if (xattr->should_free_name)
 269                kfree((void *)xattr->name);
 270        if (xattr->should_free_val)
 271                kfree((void *)xattr->val);
 272
 273        ci->i_xattrs.names_size -= xattr->name_len;
 274        ci->i_xattrs.vals_size -= xattr->val_len;
 275        ci->i_xattrs.count--;
 276        kfree(xattr);
 277
 278        return 0;
 279}
 280
 281static int __remove_xattr_by_name(struct ceph_inode_info *ci,
 282                           const char *name)
 283{
 284        struct rb_node **p;
 285        struct ceph_inode_xattr *xattr;
 286        int err;
 287
 288        p = &ci->i_xattrs.index.rb_node;
 289        xattr = __get_xattr(ci, name);
 290        err = __remove_xattr(ci, xattr);
 291        return err;
 292}
 293
 294static char *__copy_xattr_names(struct ceph_inode_info *ci,
 295                                char *dest)
 296{
 297        struct rb_node *p;
 298        struct ceph_inode_xattr *xattr = NULL;
 299
 300        p = rb_first(&ci->i_xattrs.index);
 301        dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
 302
 303        while (p) {
 304                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 305                memcpy(dest, xattr->name, xattr->name_len);
 306                dest[xattr->name_len] = '\0';
 307
 308                dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
 309                     xattr->name_len, ci->i_xattrs.names_size);
 310
 311                dest += xattr->name_len + 1;
 312                p = rb_next(p);
 313        }
 314
 315        return dest;
 316}
 317
 318void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 319{
 320        struct rb_node *p, *tmp;
 321        struct ceph_inode_xattr *xattr = NULL;
 322
 323        p = rb_first(&ci->i_xattrs.index);
 324
 325        dout("__ceph_destroy_xattrs p=%p\n", p);
 326
 327        while (p) {
 328                xattr = rb_entry(p, struct ceph_inode_xattr, node);
 329                tmp = p;
 330                p = rb_next(tmp);
 331                dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
 332                     xattr->name_len, xattr->name);
 333                rb_erase(tmp, &ci->i_xattrs.index);
 334
 335                __free_xattr(xattr);
 336        }
 337
 338        ci->i_xattrs.names_size = 0;
 339        ci->i_xattrs.vals_size = 0;
 340        ci->i_xattrs.index_version = 0;
 341        ci->i_xattrs.count = 0;
 342        ci->i_xattrs.index = RB_ROOT;
 343}
 344
 345static int __build_xattrs(struct inode *inode)
 346        __releases(inode->i_lock)
 347        __acquires(inode->i_lock)
 348{
 349        u32 namelen;
 350        u32 numattr = 0;
 351        void *p, *end;
 352        u32 len;
 353        const char *name, *val;
 354        struct ceph_inode_info *ci = ceph_inode(inode);
 355        int xattr_version;
 356        struct ceph_inode_xattr **xattrs = NULL;
 357        int err = 0;
 358        int i;
 359
 360        dout("__build_xattrs() len=%d\n",
 361             ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
 362
 363        if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
 364                return 0; /* already built */
 365
 366        __ceph_destroy_xattrs(ci);
 367
 368start:
 369        /* updated internal xattr rb tree */
 370        if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
 371                p = ci->i_xattrs.blob->vec.iov_base;
 372                end = p + ci->i_xattrs.blob->vec.iov_len;
 373                ceph_decode_32_safe(&p, end, numattr, bad);
 374                xattr_version = ci->i_xattrs.version;
 375                spin_unlock(&inode->i_lock);
 376
 377                xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
 378                                 GFP_NOFS);
 379                err = -ENOMEM;
 380                if (!xattrs)
 381                        goto bad_lock;
 382                memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
 383                for (i = 0; i < numattr; i++) {
 384                        xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
 385                                            GFP_NOFS);
 386                        if (!xattrs[i])
 387                                goto bad_lock;
 388                }
 389
 390                spin_lock(&inode->i_lock);
 391                if (ci->i_xattrs.version != xattr_version) {
 392                        /* lost a race, retry */
 393                        for (i = 0; i < numattr; i++)
 394                                kfree(xattrs[i]);
 395                        kfree(xattrs);
 396                        goto start;
 397                }
 398                err = -EIO;
 399                while (numattr--) {
 400                        ceph_decode_32_safe(&p, end, len, bad);
 401                        namelen = len;
 402                        name = p;
 403                        p += len;
 404                        ceph_decode_32_safe(&p, end, len, bad);
 405                        val = p;
 406                        p += len;
 407
 408                        err = __set_xattr(ci, name, namelen, val, len,
 409                                          0, 0, 0, &xattrs[numattr]);
 410
 411                        if (err < 0)
 412                                goto bad;
 413                }
 414                kfree(xattrs);
 415        }
 416        ci->i_xattrs.index_version = ci->i_xattrs.version;
 417        ci->i_xattrs.dirty = false;
 418
 419        return err;
 420bad_lock:
 421        spin_lock(&inode->i_lock);
 422bad:
 423        if (xattrs) {
 424                for (i = 0; i < numattr; i++)
 425                        kfree(xattrs[i]);
 426                kfree(xattrs);
 427        }
 428        ci->i_xattrs.names_size = 0;
 429        return err;
 430}
 431
 432static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
 433                                    int val_size)
 434{
 435        /*
 436         * 4 bytes for the length, and additional 4 bytes per each xattr name,
 437         * 4 bytes per each value
 438         */
 439        int size = 4 + ci->i_xattrs.count*(4 + 4) +
 440                             ci->i_xattrs.names_size +
 441                             ci->i_xattrs.vals_size;
 442        dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
 443             ci->i_xattrs.count, ci->i_xattrs.names_size,
 444             ci->i_xattrs.vals_size);
 445
 446        if (name_size)
 447                size += 4 + 4 + name_size + val_size;
 448
 449        return size;
 450}
 451
 452/*
 453 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 454 * and swap into place.
 455 */
 456void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
 457{
 458        struct rb_node *p;
 459        struct ceph_inode_xattr *xattr = NULL;
 460        void *dest;
 461
 462        dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
 463        if (ci->i_xattrs.dirty) {
 464                int need = __get_required_blob_size(ci, 0, 0);
 465
 466                BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
 467
 468                p = rb_first(&ci->i_xattrs.index);
 469                dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
 470
 471                ceph_encode_32(&dest, ci->i_xattrs.count);
 472                while (p) {
 473                        xattr = rb_entry(p, struct ceph_inode_xattr, node);
 474
 475                        ceph_encode_32(&dest, xattr->name_len);
 476                        memcpy(dest, xattr->name, xattr->name_len);
 477                        dest += xattr->name_len;
 478                        ceph_encode_32(&dest, xattr->val_len);
 479                        memcpy(dest, xattr->val, xattr->val_len);
 480                        dest += xattr->val_len;
 481
 482                        p = rb_next(p);
 483                }
 484
 485                /* adjust buffer len; it may be larger than we need */
 486                ci->i_xattrs.prealloc_blob->vec.iov_len =
 487                        dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
 488
 489                if (ci->i_xattrs.blob)
 490                        ceph_buffer_put(ci->i_xattrs.blob);
 491                ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
 492                ci->i_xattrs.prealloc_blob = NULL;
 493                ci->i_xattrs.dirty = false;
 494                ci->i_xattrs.version++;
 495        }
 496}
 497
 498ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 499                      size_t size)
 500{
 501        struct inode *inode = dentry->d_inode;
 502        struct ceph_inode_info *ci = ceph_inode(inode);
 503        struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 504        int err;
 505        struct ceph_inode_xattr *xattr;
 506        struct ceph_vxattr_cb *vxattr = NULL;
 507
 508        if (!ceph_is_valid_xattr(name))
 509                return -ENODATA;
 510
 511        /* let's see if a virtual xattr was requested */
 512        if (vxattrs)
 513                vxattr = ceph_match_vxattr(vxattrs, name);
 514
 515        spin_lock(&inode->i_lock);
 516        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
 517             ci->i_xattrs.version, ci->i_xattrs.index_version);
 518
 519        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
 520            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
 521                goto get_xattr;
 522        } else {
 523                spin_unlock(&inode->i_lock);
 524                /* get xattrs from mds (if we don't already have them) */
 525                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
 526                if (err)
 527                        return err;
 528        }
 529
 530        spin_lock(&inode->i_lock);
 531
 532        if (vxattr && vxattr->readonly) {
 533                err = vxattr->getxattr_cb(ci, value, size);
 534                goto out;
 535        }
 536
 537        err = __build_xattrs(inode);
 538        if (err < 0)
 539                goto out;
 540
 541get_xattr:
 542        err = -ENODATA;  /* == ENOATTR */
 543        xattr = __get_xattr(ci, name);
 544        if (!xattr) {
 545                if (vxattr)
 546                        err = vxattr->getxattr_cb(ci, value, size);
 547                goto out;
 548        }
 549
 550        err = -ERANGE;
 551        if (size && size < xattr->val_len)
 552                goto out;
 553
 554        err = xattr->val_len;
 555        if (size == 0)
 556                goto out;
 557
 558        memcpy(value, xattr->val, xattr->val_len);
 559
 560out:
 561        spin_unlock(&inode->i_lock);
 562        return err;
 563}
 564
 565ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 566{
 567        struct inode *inode = dentry->d_inode;
 568        struct ceph_inode_info *ci = ceph_inode(inode);
 569        struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 570        u32 vir_namelen = 0;
 571        u32 namelen;
 572        int err;
 573        u32 len;
 574        int i;
 575
 576        spin_lock(&inode->i_lock);
 577        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
 578             ci->i_xattrs.version, ci->i_xattrs.index_version);
 579
 580        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
 581            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
 582                goto list_xattr;
 583        } else {
 584                spin_unlock(&inode->i_lock);
 585                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
 586                if (err)
 587                        return err;
 588        }
 589
 590        spin_lock(&inode->i_lock);
 591
 592        err = __build_xattrs(inode);
 593        if (err < 0)
 594                goto out;
 595
 596list_xattr:
 597        vir_namelen = 0;
 598        /* include virtual dir xattrs */
 599        if (vxattrs)
 600                for (i = 0; vxattrs[i].name; i++)
 601                        vir_namelen += strlen(vxattrs[i].name) + 1;
 602        /* adding 1 byte per each variable due to the null termination */
 603        namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
 604        err = -ERANGE;
 605        if (size && namelen > size)
 606                goto out;
 607
 608        err = namelen;
 609        if (size == 0)
 610                goto out;
 611
 612        names = __copy_xattr_names(ci, names);
 613
 614        /* virtual xattr names, too */
 615        if (vxattrs)
 616                for (i = 0; vxattrs[i].name; i++) {
 617                        len = sprintf(names, "%s", vxattrs[i].name);
 618                        names += len + 1;
 619                }
 620
 621out:
 622        spin_unlock(&inode->i_lock);
 623        return err;
 624}
 625
 626static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
 627                              const char *value, size_t size, int flags)
 628{
 629        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 630        struct inode *inode = dentry->d_inode;
 631        struct ceph_inode_info *ci = ceph_inode(inode);
 632        struct inode *parent_inode;
 633        struct ceph_mds_request *req;
 634        struct ceph_mds_client *mdsc = fsc->mdsc;
 635        int err;
 636        int i, nr_pages;
 637        struct page **pages = NULL;
 638        void *kaddr;
 639
 640        /* copy value into some pages */
 641        nr_pages = calc_pages_for(0, size);
 642        if (nr_pages) {
 643                pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
 644                if (!pages)
 645                        return -ENOMEM;
 646                err = -ENOMEM;
 647                for (i = 0; i < nr_pages; i++) {
 648                        pages[i] = __page_cache_alloc(GFP_NOFS);
 649                        if (!pages[i]) {
 650                                nr_pages = i;
 651                                goto out;
 652                        }
 653                        kaddr = kmap(pages[i]);
 654                        memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
 655                               min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
 656                }
 657        }
 658
 659        dout("setxattr value=%.*s\n", (int)size, value);
 660
 661        /* do request */
 662        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
 663                                       USE_AUTH_MDS);
 664        if (IS_ERR(req)) {
 665                err = PTR_ERR(req);
 666                goto out;
 667        }
 668        req->r_inode = inode;
 669        ihold(inode);
 670        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 671        req->r_num_caps = 1;
 672        req->r_args.setxattr.flags = cpu_to_le32(flags);
 673        req->r_path2 = kstrdup(name, GFP_NOFS);
 674
 675        req->r_pages = pages;
 676        req->r_num_pages = nr_pages;
 677        req->r_data_len = size;
 678
 679        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
 680        parent_inode = ceph_get_dentry_parent_inode(dentry);
 681        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 682        iput(parent_inode);
 683        ceph_mdsc_put_request(req);
 684        dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
 685
 686out:
 687        if (pages) {
 688                for (i = 0; i < nr_pages; i++)
 689                        __free_page(pages[i]);
 690                kfree(pages);
 691        }
 692        return err;
 693}
 694
 695int ceph_setxattr(struct dentry *dentry, const char *name,
 696                  const void *value, size_t size, int flags)
 697{
 698        struct inode *inode = dentry->d_inode;
 699        struct ceph_inode_info *ci = ceph_inode(inode);
 700        struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 701        int err;
 702        int name_len = strlen(name);
 703        int val_len = size;
 704        char *newname = NULL;
 705        char *newval = NULL;
 706        struct ceph_inode_xattr *xattr = NULL;
 707        int issued;
 708        int required_blob_size;
 709        int dirty;
 710
 711        if (ceph_snap(inode) != CEPH_NOSNAP)
 712                return -EROFS;
 713
 714        if (!ceph_is_valid_xattr(name))
 715                return -EOPNOTSUPP;
 716
 717        if (vxattrs) {
 718                struct ceph_vxattr_cb *vxattr =
 719                        ceph_match_vxattr(vxattrs, name);
 720                if (vxattr && vxattr->readonly)
 721                        return -EOPNOTSUPP;
 722        }
 723
 724        /* preallocate memory for xattr name, value, index node */
 725        err = -ENOMEM;
 726        newname = kmemdup(name, name_len + 1, GFP_NOFS);
 727        if (!newname)
 728                goto out;
 729
 730        if (val_len) {
 731                newval = kmalloc(val_len + 1, GFP_NOFS);
 732                if (!newval)
 733                        goto out;
 734                memcpy(newval, value, val_len);
 735                newval[val_len] = '\0';
 736        }
 737
 738        xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
 739        if (!xattr)
 740                goto out;
 741
 742        spin_lock(&inode->i_lock);
 743retry:
 744        issued = __ceph_caps_issued(ci, NULL);
 745        if (!(issued & CEPH_CAP_XATTR_EXCL))
 746                goto do_sync;
 747        __build_xattrs(inode);
 748
 749        required_blob_size = __get_required_blob_size(ci, name_len, val_len);
 750
 751        if (!ci->i_xattrs.prealloc_blob ||
 752            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
 753                struct ceph_buffer *blob = NULL;
 754
 755                spin_unlock(&inode->i_lock);
 756                dout(" preaallocating new blob size=%d\n", required_blob_size);
 757                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
 758                if (!blob)
 759                        goto out;
 760                spin_lock(&inode->i_lock);
 761                if (ci->i_xattrs.prealloc_blob)
 762                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
 763                ci->i_xattrs.prealloc_blob = blob;
 764                goto retry;
 765        }
 766
 767        dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
 768        err = __set_xattr(ci, newname, name_len, newval,
 769                          val_len, 1, 1, 1, &xattr);
 770        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 771        ci->i_xattrs.dirty = true;
 772        inode->i_ctime = CURRENT_TIME;
 773        spin_unlock(&inode->i_lock);
 774        if (dirty)
 775                __mark_inode_dirty(inode, dirty);
 776        return err;
 777
 778do_sync:
 779        spin_unlock(&inode->i_lock);
 780        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 781out:
 782        kfree(newname);
 783        kfree(newval);
 784        kfree(xattr);
 785        return err;
 786}
 787
 788static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 789{
 790        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 791        struct ceph_mds_client *mdsc = fsc->mdsc;
 792        struct inode *inode = dentry->d_inode;
 793        struct inode *parent_inode;
 794        struct ceph_mds_request *req;
 795        int err;
 796
 797        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
 798                                       USE_AUTH_MDS);
 799        if (IS_ERR(req))
 800                return PTR_ERR(req);
 801        req->r_inode = inode;
 802        ihold(inode);
 803        req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
 804        req->r_num_caps = 1;
 805        req->r_path2 = kstrdup(name, GFP_NOFS);
 806
 807        parent_inode = ceph_get_dentry_parent_inode(dentry);
 808        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 809        iput(parent_inode);
 810        ceph_mdsc_put_request(req);
 811        return err;
 812}
 813
 814int ceph_removexattr(struct dentry *dentry, const char *name)
 815{
 816        struct inode *inode = dentry->d_inode;
 817        struct ceph_inode_info *ci = ceph_inode(inode);
 818        struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 819        int issued;
 820        int err;
 821        int dirty;
 822
 823        if (ceph_snap(inode) != CEPH_NOSNAP)
 824                return -EROFS;
 825
 826        if (!ceph_is_valid_xattr(name))
 827                return -EOPNOTSUPP;
 828
 829        if (vxattrs) {
 830                struct ceph_vxattr_cb *vxattr =
 831                        ceph_match_vxattr(vxattrs, name);
 832                if (vxattr && vxattr->readonly)
 833                        return -EOPNOTSUPP;
 834        }
 835
 836        spin_lock(&inode->i_lock);
 837        __build_xattrs(inode);
 838        issued = __ceph_caps_issued(ci, NULL);
 839        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
 840
 841        if (!(issued & CEPH_CAP_XATTR_EXCL))
 842                goto do_sync;
 843
 844        err = __remove_xattr_by_name(ceph_inode(inode), name);
 845        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 846        ci->i_xattrs.dirty = true;
 847        inode->i_ctime = CURRENT_TIME;
 848
 849        spin_unlock(&inode->i_lock);
 850        if (dirty)
 851                __mark_inode_dirty(inode, dirty);
 852        return err;
 853do_sync:
 854        spin_unlock(&inode->i_lock);
 855        err = ceph_send_removexattr(dentry, name);
 856        return err;
 857}
 858
 859