1#include <linux/ceph/ceph_debug.h>
2#include <linux/ceph/pagelist.h>
3
4#include "super.h"
5#include "mds_client.h"
6
7#include <linux/ceph/decode.h>
8
9#include <linux/xattr.h>
10#include <linux/slab.h>
11#include <linux/ratelimit.h>
12
13#define XATTR_CEPH_PREFIX "ceph."
14#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
15
16static int __remove_xattr(struct ceph_inode_info *ci,
17 struct ceph_inode_xattr *xattr);
18
19
20
21
22const struct xattr_handler *ceph_xattr_handlers[] = {
23#ifdef CONFIG_CEPH_FS_POSIX_ACL
24 &ceph_xattr_acl_access_handler,
25 &ceph_xattr_acl_default_handler,
26#endif
27 NULL,
28};
29
30static bool ceph_is_valid_xattr(const char *name)
31{
32 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
33 !strncmp(name, XATTR_SECURITY_PREFIX,
34 XATTR_SECURITY_PREFIX_LEN) ||
35 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
36 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
37 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
38}
39
40
41
42
43
44struct ceph_vxattr {
45 char *name;
46 size_t name_size;
47 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
48 size_t size);
49 bool readonly, hidden;
50 bool (*exists_cb)(struct ceph_inode_info *ci);
51};
52
53
54
55static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
56{
57 struct ceph_file_layout *fl = &ci->i_layout;
58 return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
59 fl->object_size > 0 || fl->pool_id >= 0 ||
60 rcu_dereference_raw(fl->pool_ns) != NULL);
61}
62
63static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
64 size_t size)
65{
66 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
67 struct ceph_osd_client *osdc = &fsc->client->osdc;
68 struct ceph_string *pool_ns;
69 s64 pool = ci->i_layout.pool_id;
70 const char *pool_name;
71 const char *ns_field = " pool_namespace=";
72 char buf[128];
73 size_t len, total_len = 0;
74 int ret;
75
76 pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
77
78 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
79 down_read(&osdc->lock);
80 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
81 if (pool_name) {
82 len = snprintf(buf, sizeof(buf),
83 "stripe_unit=%u stripe_count=%u object_size=%u pool=",
84 ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
85 ci->i_layout.object_size);
86 total_len = len + strlen(pool_name);
87 } else {
88 len = snprintf(buf, sizeof(buf),
89 "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
90 ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
91 ci->i_layout.object_size, (unsigned long long)pool);
92 total_len = len;
93 }
94
95 if (pool_ns)
96 total_len += strlen(ns_field) + pool_ns->len;
97
98 if (!size) {
99 ret = total_len;
100 } else if (total_len > size) {
101 ret = -ERANGE;
102 } else {
103 memcpy(val, buf, len);
104 ret = len;
105 if (pool_name) {
106 len = strlen(pool_name);
107 memcpy(val + ret, pool_name, len);
108 ret += len;
109 }
110 if (pool_ns) {
111 len = strlen(ns_field);
112 memcpy(val + ret, ns_field, len);
113 ret += len;
114 memcpy(val + ret, pool_ns->str, pool_ns->len);
115 ret += pool_ns->len;
116 }
117 }
118 up_read(&osdc->lock);
119 ceph_put_string(pool_ns);
120 return ret;
121}
122
123static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
124 char *val, size_t size)
125{
126 return snprintf(val, size, "%u", ci->i_layout.stripe_unit);
127}
128
129static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
130 char *val, size_t size)
131{
132 return snprintf(val, size, "%u", ci->i_layout.stripe_count);
133}
134
135static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
136 char *val, size_t size)
137{
138 return snprintf(val, size, "%u", ci->i_layout.object_size);
139}
140
141static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
142 char *val, size_t size)
143{
144 int ret;
145 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
146 struct ceph_osd_client *osdc = &fsc->client->osdc;
147 s64 pool = ci->i_layout.pool_id;
148 const char *pool_name;
149
150 down_read(&osdc->lock);
151 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
152 if (pool_name)
153 ret = snprintf(val, size, "%s", pool_name);
154 else
155 ret = snprintf(val, size, "%lld", (unsigned long long)pool);
156 up_read(&osdc->lock);
157 return ret;
158}
159
160static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
161 char *val, size_t size)
162{
163 int ret = 0;
164 struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
165 if (ns) {
166 ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str);
167 ceph_put_string(ns);
168 }
169 return ret;
170}
171
172
173
174static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
175 size_t size)
176{
177 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
178}
179
180static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
181 size_t size)
182{
183 return snprintf(val, size, "%lld", ci->i_files);
184}
185
186static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
187 size_t size)
188{
189 return snprintf(val, size, "%lld", ci->i_subdirs);
190}
191
192static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
193 size_t size)
194{
195 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
196}
197
198static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
199 size_t size)
200{
201 return snprintf(val, size, "%lld", ci->i_rfiles);
202}
203
204static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
205 size_t size)
206{
207 return snprintf(val, size, "%lld", ci->i_rsubdirs);
208}
209
210static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
211 size_t size)
212{
213 return snprintf(val, size, "%lld", ci->i_rbytes);
214}
215
216static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
217 size_t size)
218{
219 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
220 (long)ci->i_rctime.tv_nsec);
221}
222
223
224
225static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
226{
227 bool ret = false;
228 spin_lock(&ci->i_ceph_lock);
229 if ((ci->i_max_files || ci->i_max_bytes) &&
230 ci->i_vino.snap == CEPH_NOSNAP &&
231 ci->i_snap_realm &&
232 ci->i_snap_realm->ino == ci->i_vino.ino)
233 ret = true;
234 spin_unlock(&ci->i_ceph_lock);
235 return ret;
236}
237
238static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
239 size_t size)
240{
241 return snprintf(val, size, "max_bytes=%llu max_files=%llu",
242 ci->i_max_bytes, ci->i_max_files);
243}
244
245static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
246 char *val, size_t size)
247{
248 return snprintf(val, size, "%llu", ci->i_max_bytes);
249}
250
251static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
252 char *val, size_t size)
253{
254 return snprintf(val, size, "%llu", ci->i_max_files);
255}
256
257#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
258#define CEPH_XATTR_NAME2(_type, _name, _name2) \
259 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
260
261#define XATTR_NAME_CEPH(_type, _name) \
262 { \
263 .name = CEPH_XATTR_NAME(_type, _name), \
264 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
265 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
266 .readonly = true, \
267 .hidden = false, \
268 .exists_cb = NULL, \
269 }
270#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
271 { \
272 .name = CEPH_XATTR_NAME2(_type, _name, _field), \
273 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
274 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
275 .readonly = false, \
276 .hidden = true, \
277 .exists_cb = ceph_vxattrcb_layout_exists, \
278 }
279#define XATTR_QUOTA_FIELD(_type, _name) \
280 { \
281 .name = CEPH_XATTR_NAME(_type, _name), \
282 .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \
283 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
284 .readonly = false, \
285 .hidden = true, \
286 .exists_cb = ceph_vxattrcb_quota_exists, \
287 }
288
289static struct ceph_vxattr ceph_dir_vxattrs[] = {
290 {
291 .name = "ceph.dir.layout",
292 .name_size = sizeof("ceph.dir.layout"),
293 .getxattr_cb = ceph_vxattrcb_layout,
294 .readonly = false,
295 .hidden = true,
296 .exists_cb = ceph_vxattrcb_layout_exists,
297 },
298 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
299 XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
300 XATTR_LAYOUT_FIELD(dir, layout, object_size),
301 XATTR_LAYOUT_FIELD(dir, layout, pool),
302 XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
303 XATTR_NAME_CEPH(dir, entries),
304 XATTR_NAME_CEPH(dir, files),
305 XATTR_NAME_CEPH(dir, subdirs),
306 XATTR_NAME_CEPH(dir, rentries),
307 XATTR_NAME_CEPH(dir, rfiles),
308 XATTR_NAME_CEPH(dir, rsubdirs),
309 XATTR_NAME_CEPH(dir, rbytes),
310 XATTR_NAME_CEPH(dir, rctime),
311 {
312 .name = "ceph.quota",
313 .name_size = sizeof("ceph.quota"),
314 .getxattr_cb = ceph_vxattrcb_quota,
315 .readonly = false,
316 .hidden = true,
317 .exists_cb = ceph_vxattrcb_quota_exists,
318 },
319 XATTR_QUOTA_FIELD(quota, max_bytes),
320 XATTR_QUOTA_FIELD(quota, max_files),
321 { .name = NULL, 0 }
322};
323static size_t ceph_dir_vxattrs_name_size;
324
325
326
327static struct ceph_vxattr ceph_file_vxattrs[] = {
328 {
329 .name = "ceph.file.layout",
330 .name_size = sizeof("ceph.file.layout"),
331 .getxattr_cb = ceph_vxattrcb_layout,
332 .readonly = false,
333 .hidden = true,
334 .exists_cb = ceph_vxattrcb_layout_exists,
335 },
336 XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
337 XATTR_LAYOUT_FIELD(file, layout, stripe_count),
338 XATTR_LAYOUT_FIELD(file, layout, object_size),
339 XATTR_LAYOUT_FIELD(file, layout, pool),
340 XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
341 { .name = NULL, 0 }
342};
343static size_t ceph_file_vxattrs_name_size;
344
345static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
346{
347 if (S_ISDIR(inode->i_mode))
348 return ceph_dir_vxattrs;
349 else if (S_ISREG(inode->i_mode))
350 return ceph_file_vxattrs;
351 return NULL;
352}
353
354static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
355{
356 if (vxattrs == ceph_dir_vxattrs)
357 return ceph_dir_vxattrs_name_size;
358 if (vxattrs == ceph_file_vxattrs)
359 return ceph_file_vxattrs_name_size;
360 BUG_ON(vxattrs);
361 return 0;
362}
363
364
365
366
367
368static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
369{
370 struct ceph_vxattr *vxattr;
371 size_t size = 0;
372
373 for (vxattr = vxattrs; vxattr->name; vxattr++)
374 if (!vxattr->hidden)
375 size += vxattr->name_size;
376
377 return size;
378}
379
380
381
382void __init ceph_xattr_init(void)
383{
384 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
385 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
386}
387
388void ceph_xattr_exit(void)
389{
390 ceph_dir_vxattrs_name_size = 0;
391 ceph_file_vxattrs_name_size = 0;
392}
393
394static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
395 const char *name)
396{
397 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
398
399 if (vxattr) {
400 while (vxattr->name) {
401 if (!strcmp(vxattr->name, name))
402 return vxattr;
403 vxattr++;
404 }
405 }
406
407 return NULL;
408}
409
410static int __set_xattr(struct ceph_inode_info *ci,
411 const char *name, int name_len,
412 const char *val, int val_len,
413 int flags, int update_xattr,
414 struct ceph_inode_xattr **newxattr)
415{
416 struct rb_node **p;
417 struct rb_node *parent = NULL;
418 struct ceph_inode_xattr *xattr = NULL;
419 int c;
420 int new = 0;
421
422 p = &ci->i_xattrs.index.rb_node;
423 while (*p) {
424 parent = *p;
425 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
426 c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
427 if (c < 0)
428 p = &(*p)->rb_left;
429 else if (c > 0)
430 p = &(*p)->rb_right;
431 else {
432 if (name_len == xattr->name_len)
433 break;
434 else if (name_len < xattr->name_len)
435 p = &(*p)->rb_left;
436 else
437 p = &(*p)->rb_right;
438 }
439 xattr = NULL;
440 }
441
442 if (update_xattr) {
443 int err = 0;
444
445 if (xattr && (flags & XATTR_CREATE))
446 err = -EEXIST;
447 else if (!xattr && (flags & XATTR_REPLACE))
448 err = -ENODATA;
449 if (err) {
450 kfree(name);
451 kfree(val);
452 kfree(*newxattr);
453 return err;
454 }
455 if (update_xattr < 0) {
456 if (xattr)
457 __remove_xattr(ci, xattr);
458 kfree(name);
459 kfree(*newxattr);
460 return 0;
461 }
462 }
463
464 if (!xattr) {
465 new = 1;
466 xattr = *newxattr;
467 xattr->name = name;
468 xattr->name_len = name_len;
469 xattr->should_free_name = update_xattr;
470
471 ci->i_xattrs.count++;
472 dout("__set_xattr count=%d\n", ci->i_xattrs.count);
473 } else {
474 kfree(*newxattr);
475 *newxattr = NULL;
476 if (xattr->should_free_val)
477 kfree((void *)xattr->val);
478
479 if (update_xattr) {
480 kfree((void *)name);
481 name = xattr->name;
482 }
483 ci->i_xattrs.names_size -= xattr->name_len;
484 ci->i_xattrs.vals_size -= xattr->val_len;
485 }
486 ci->i_xattrs.names_size += name_len;
487 ci->i_xattrs.vals_size += val_len;
488 if (val)
489 xattr->val = val;
490 else
491 xattr->val = "";
492
493 xattr->val_len = val_len;
494 xattr->dirty = update_xattr;
495 xattr->should_free_val = (val && update_xattr);
496
497 if (new) {
498 rb_link_node(&xattr->node, parent, p);
499 rb_insert_color(&xattr->node, &ci->i_xattrs.index);
500 dout("__set_xattr_val p=%p\n", p);
501 }
502
503 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
504 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
505
506 return 0;
507}
508
509static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
510 const char *name)
511{
512 struct rb_node **p;
513 struct rb_node *parent = NULL;
514 struct ceph_inode_xattr *xattr = NULL;
515 int name_len = strlen(name);
516 int c;
517
518 p = &ci->i_xattrs.index.rb_node;
519 while (*p) {
520 parent = *p;
521 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
522 c = strncmp(name, xattr->name, xattr->name_len);
523 if (c == 0 && name_len > xattr->name_len)
524 c = 1;
525 if (c < 0)
526 p = &(*p)->rb_left;
527 else if (c > 0)
528 p = &(*p)->rb_right;
529 else {
530 dout("__get_xattr %s: found %.*s\n", name,
531 xattr->val_len, xattr->val);
532 return xattr;
533 }
534 }
535
536 dout("__get_xattr %s: not found\n", name);
537
538 return NULL;
539}
540
541static void __free_xattr(struct ceph_inode_xattr *xattr)
542{
543 BUG_ON(!xattr);
544
545 if (xattr->should_free_name)
546 kfree((void *)xattr->name);
547 if (xattr->should_free_val)
548 kfree((void *)xattr->val);
549
550 kfree(xattr);
551}
552
553static int __remove_xattr(struct ceph_inode_info *ci,
554 struct ceph_inode_xattr *xattr)
555{
556 if (!xattr)
557 return -ENODATA;
558
559 rb_erase(&xattr->node, &ci->i_xattrs.index);
560
561 if (xattr->should_free_name)
562 kfree((void *)xattr->name);
563 if (xattr->should_free_val)
564 kfree((void *)xattr->val);
565
566 ci->i_xattrs.names_size -= xattr->name_len;
567 ci->i_xattrs.vals_size -= xattr->val_len;
568 ci->i_xattrs.count--;
569 kfree(xattr);
570
571 return 0;
572}
573
574static int __remove_xattr_by_name(struct ceph_inode_info *ci,
575 const char *name)
576{
577 struct rb_node **p;
578 struct ceph_inode_xattr *xattr;
579 int err;
580
581 p = &ci->i_xattrs.index.rb_node;
582 xattr = __get_xattr(ci, name);
583 err = __remove_xattr(ci, xattr);
584 return err;
585}
586
587static char *__copy_xattr_names(struct ceph_inode_info *ci,
588 char *dest)
589{
590 struct rb_node *p;
591 struct ceph_inode_xattr *xattr = NULL;
592
593 p = rb_first(&ci->i_xattrs.index);
594 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
595
596 while (p) {
597 xattr = rb_entry(p, struct ceph_inode_xattr, node);
598 memcpy(dest, xattr->name, xattr->name_len);
599 dest[xattr->name_len] = '\0';
600
601 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
602 xattr->name_len, ci->i_xattrs.names_size);
603
604 dest += xattr->name_len + 1;
605 p = rb_next(p);
606 }
607
608 return dest;
609}
610
611void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
612{
613 struct rb_node *p, *tmp;
614 struct ceph_inode_xattr *xattr = NULL;
615
616 p = rb_first(&ci->i_xattrs.index);
617
618 dout("__ceph_destroy_xattrs p=%p\n", p);
619
620 while (p) {
621 xattr = rb_entry(p, struct ceph_inode_xattr, node);
622 tmp = p;
623 p = rb_next(tmp);
624 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
625 xattr->name_len, xattr->name);
626 rb_erase(tmp, &ci->i_xattrs.index);
627
628 __free_xattr(xattr);
629 }
630
631 ci->i_xattrs.names_size = 0;
632 ci->i_xattrs.vals_size = 0;
633 ci->i_xattrs.index_version = 0;
634 ci->i_xattrs.count = 0;
635 ci->i_xattrs.index = RB_ROOT;
636}
637
638static int __build_xattrs(struct inode *inode)
639 __releases(ci->i_ceph_lock)
640 __acquires(ci->i_ceph_lock)
641{
642 u32 namelen;
643 u32 numattr = 0;
644 void *p, *end;
645 u32 len;
646 const char *name, *val;
647 struct ceph_inode_info *ci = ceph_inode(inode);
648 int xattr_version;
649 struct ceph_inode_xattr **xattrs = NULL;
650 int err = 0;
651 int i;
652
653 dout("__build_xattrs() len=%d\n",
654 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
655
656 if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
657 return 0;
658
659 __ceph_destroy_xattrs(ci);
660
661start:
662
663 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
664 p = ci->i_xattrs.blob->vec.iov_base;
665 end = p + ci->i_xattrs.blob->vec.iov_len;
666 ceph_decode_32_safe(&p, end, numattr, bad);
667 xattr_version = ci->i_xattrs.version;
668 spin_unlock(&ci->i_ceph_lock);
669
670 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
671 GFP_NOFS);
672 err = -ENOMEM;
673 if (!xattrs)
674 goto bad_lock;
675
676 for (i = 0; i < numattr; i++) {
677 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
678 GFP_NOFS);
679 if (!xattrs[i])
680 goto bad_lock;
681 }
682
683 spin_lock(&ci->i_ceph_lock);
684 if (ci->i_xattrs.version != xattr_version) {
685
686 for (i = 0; i < numattr; i++)
687 kfree(xattrs[i]);
688 kfree(xattrs);
689 xattrs = NULL;
690 goto start;
691 }
692 err = -EIO;
693 while (numattr--) {
694 ceph_decode_32_safe(&p, end, len, bad);
695 namelen = len;
696 name = p;
697 p += len;
698 ceph_decode_32_safe(&p, end, len, bad);
699 val = p;
700 p += len;
701
702 err = __set_xattr(ci, name, namelen, val, len,
703 0, 0, &xattrs[numattr]);
704
705 if (err < 0)
706 goto bad;
707 }
708 kfree(xattrs);
709 }
710 ci->i_xattrs.index_version = ci->i_xattrs.version;
711 ci->i_xattrs.dirty = false;
712
713 return err;
714bad_lock:
715 spin_lock(&ci->i_ceph_lock);
716bad:
717 if (xattrs) {
718 for (i = 0; i < numattr; i++)
719 kfree(xattrs[i]);
720 kfree(xattrs);
721 }
722 ci->i_xattrs.names_size = 0;
723 return err;
724}
725
726static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
727 int val_size)
728{
729
730
731
732
733 int size = 4 + ci->i_xattrs.count*(4 + 4) +
734 ci->i_xattrs.names_size +
735 ci->i_xattrs.vals_size;
736 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
737 ci->i_xattrs.count, ci->i_xattrs.names_size,
738 ci->i_xattrs.vals_size);
739
740 if (name_size)
741 size += 4 + 4 + name_size + val_size;
742
743 return size;
744}
745
746
747
748
749
750void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
751{
752 struct rb_node *p;
753 struct ceph_inode_xattr *xattr = NULL;
754 void *dest;
755
756 dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
757 if (ci->i_xattrs.dirty) {
758 int need = __get_required_blob_size(ci, 0, 0);
759
760 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
761
762 p = rb_first(&ci->i_xattrs.index);
763 dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
764
765 ceph_encode_32(&dest, ci->i_xattrs.count);
766 while (p) {
767 xattr = rb_entry(p, struct ceph_inode_xattr, node);
768
769 ceph_encode_32(&dest, xattr->name_len);
770 memcpy(dest, xattr->name, xattr->name_len);
771 dest += xattr->name_len;
772 ceph_encode_32(&dest, xattr->val_len);
773 memcpy(dest, xattr->val, xattr->val_len);
774 dest += xattr->val_len;
775
776 p = rb_next(p);
777 }
778
779
780 ci->i_xattrs.prealloc_blob->vec.iov_len =
781 dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
782
783 if (ci->i_xattrs.blob)
784 ceph_buffer_put(ci->i_xattrs.blob);
785 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
786 ci->i_xattrs.prealloc_blob = NULL;
787 ci->i_xattrs.dirty = false;
788 ci->i_xattrs.version++;
789 }
790}
791
792static inline int __get_request_mask(struct inode *in) {
793 struct ceph_mds_request *req = current->journal_info;
794 int mask = 0;
795 if (req && req->r_target_inode == in) {
796 if (req->r_op == CEPH_MDS_OP_LOOKUP ||
797 req->r_op == CEPH_MDS_OP_LOOKUPINO ||
798 req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
799 req->r_op == CEPH_MDS_OP_GETATTR) {
800 mask = le32_to_cpu(req->r_args.getattr.mask);
801 } else if (req->r_op == CEPH_MDS_OP_OPEN ||
802 req->r_op == CEPH_MDS_OP_CREATE) {
803 mask = le32_to_cpu(req->r_args.open.mask);
804 }
805 }
806 return mask;
807}
808
809ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
810 size_t size)
811{
812 struct ceph_inode_info *ci = ceph_inode(inode);
813 struct ceph_inode_xattr *xattr;
814 struct ceph_vxattr *vxattr = NULL;
815 int req_mask;
816 int err;
817
818 if (!ceph_is_valid_xattr(name))
819 return -ENODATA;
820
821
822 vxattr = ceph_match_vxattr(inode, name);
823 if (vxattr) {
824 err = ceph_do_getattr(inode, 0, true);
825 if (err)
826 return err;
827 err = -ENODATA;
828 if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
829 err = vxattr->getxattr_cb(ci, value, size);
830 return err;
831 }
832
833 req_mask = __get_request_mask(inode);
834
835 spin_lock(&ci->i_ceph_lock);
836 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
837 ci->i_xattrs.version, ci->i_xattrs.index_version);
838
839 if (ci->i_xattrs.version == 0 ||
840 !((req_mask & CEPH_CAP_XATTR_SHARED) ||
841 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
842 spin_unlock(&ci->i_ceph_lock);
843
844
845 if (current->journal_info) {
846 pr_warn_ratelimited("sync getxattr %p "
847 "during filling trace\n", inode);
848 return -EBUSY;
849 }
850
851
852 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
853 if (err)
854 return err;
855 spin_lock(&ci->i_ceph_lock);
856 }
857
858 err = __build_xattrs(inode);
859 if (err < 0)
860 goto out;
861
862 err = -ENODATA;
863 xattr = __get_xattr(ci, name);
864 if (!xattr)
865 goto out;
866
867 err = -ERANGE;
868 if (size && size < xattr->val_len)
869 goto out;
870
871 err = xattr->val_len;
872 if (size == 0)
873 goto out;
874
875 memcpy(value, xattr->val, xattr->val_len);
876
877 if (current->journal_info &&
878 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
879 ci->i_ceph_flags |= CEPH_I_SEC_INITED;
880out:
881 spin_unlock(&ci->i_ceph_lock);
882 return err;
883}
884
885ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
886 size_t size)
887{
888 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
889 return generic_getxattr(dentry, name, value, size);
890
891 return __ceph_getxattr(dentry->d_inode, name, value, size);
892}
893
894ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
895{
896 struct inode *inode = dentry->d_inode;
897 struct ceph_inode_info *ci = ceph_inode(inode);
898 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
899 u32 vir_namelen = 0;
900 u32 namelen;
901 int err;
902 u32 len;
903 int i;
904
905 spin_lock(&ci->i_ceph_lock);
906 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
907 ci->i_xattrs.version, ci->i_xattrs.index_version);
908
909 if (ci->i_xattrs.version == 0 ||
910 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
911 spin_unlock(&ci->i_ceph_lock);
912 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
913 if (err)
914 return err;
915 spin_lock(&ci->i_ceph_lock);
916 }
917
918 err = __build_xattrs(inode);
919 if (err < 0)
920 goto out;
921
922
923
924
925 vir_namelen = ceph_vxattrs_name_size(vxattrs);
926
927
928 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
929 err = -ERANGE;
930 if (size && vir_namelen + namelen > size)
931 goto out;
932
933 err = namelen + vir_namelen;
934 if (size == 0)
935 goto out;
936
937 names = __copy_xattr_names(ci, names);
938
939
940 err = namelen;
941 if (vxattrs) {
942 for (i = 0; vxattrs[i].name; i++) {
943 if (!vxattrs[i].hidden &&
944 !(vxattrs[i].exists_cb &&
945 !vxattrs[i].exists_cb(ci))) {
946 len = sprintf(names, "%s", vxattrs[i].name);
947 names += len + 1;
948 err += len + 1;
949 }
950 }
951 }
952
953out:
954 spin_unlock(&ci->i_ceph_lock);
955 return err;
956}
957
958static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
959 const char *value, size_t size, int flags)
960{
961 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
962 struct inode *inode = dentry->d_inode;
963 struct ceph_inode_info *ci = ceph_inode(inode);
964 struct ceph_mds_request *req;
965 struct ceph_mds_client *mdsc = fsc->mdsc;
966 struct ceph_pagelist *pagelist = NULL;
967 int op = CEPH_MDS_OP_SETXATTR;
968 int err;
969
970 if (size > 0) {
971
972 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
973 if (!pagelist)
974 return -ENOMEM;
975
976 ceph_pagelist_init(pagelist);
977 err = ceph_pagelist_append(pagelist, value, size);
978 if (err)
979 goto out;
980 } else if (!value) {
981 if (flags & CEPH_XATTR_REPLACE)
982 op = CEPH_MDS_OP_RMXATTR;
983 else
984 flags |= CEPH_XATTR_REMOVE;
985 }
986
987 dout("setxattr value=%.*s\n", (int)size, value);
988
989
990 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
991 if (IS_ERR(req)) {
992 err = PTR_ERR(req);
993 goto out;
994 }
995
996 req->r_path2 = kstrdup(name, GFP_NOFS);
997 if (!req->r_path2) {
998 ceph_mdsc_put_request(req);
999 err = -ENOMEM;
1000 goto out;
1001 }
1002
1003 if (op == CEPH_MDS_OP_SETXATTR) {
1004 req->r_args.setxattr.flags = cpu_to_le32(flags);
1005 req->r_pagelist = pagelist;
1006 pagelist = NULL;
1007 }
1008
1009 req->r_inode = inode;
1010 ihold(inode);
1011 req->r_num_caps = 1;
1012 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1013
1014 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
1015 err = ceph_mdsc_do_request(mdsc, NULL, req);
1016 ceph_mdsc_put_request(req);
1017 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
1018
1019out:
1020 if (pagelist)
1021 ceph_pagelist_release(pagelist);
1022 return err;
1023}
1024
1025int __ceph_setxattr(struct dentry *dentry, const char *name,
1026 const void *value, size_t size, int flags)
1027{
1028 struct inode *inode = dentry->d_inode;
1029 struct ceph_vxattr *vxattr;
1030 struct ceph_inode_info *ci = ceph_inode(inode);
1031 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1032 struct ceph_cap_flush *prealloc_cf = NULL;
1033 int issued;
1034 int err;
1035 int dirty = 0;
1036 int name_len = strlen(name);
1037 int val_len = size;
1038 char *newname = NULL;
1039 char *newval = NULL;
1040 struct ceph_inode_xattr *xattr = NULL;
1041 int required_blob_size;
1042 bool check_realm = false;
1043 bool lock_snap_rwsem = false;
1044
1045 if (ceph_snap(inode) != CEPH_NOSNAP)
1046 return -EROFS;
1047
1048 if (!ceph_is_valid_xattr(name))
1049 return -EOPNOTSUPP;
1050
1051 vxattr = ceph_match_vxattr(inode, name);
1052 if (vxattr) {
1053 if (vxattr->readonly)
1054 return -EOPNOTSUPP;
1055 if (value && !strncmp(vxattr->name, "ceph.quota", 10))
1056 check_realm = true;
1057 }
1058
1059
1060 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1061 goto do_sync_unlocked;
1062
1063
1064 err = -ENOMEM;
1065 newname = kmemdup(name, name_len + 1, GFP_NOFS);
1066 if (!newname)
1067 goto out;
1068
1069 if (val_len) {
1070 newval = kmemdup(value, val_len, GFP_NOFS);
1071 if (!newval)
1072 goto out;
1073 }
1074
1075 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
1076 if (!xattr)
1077 goto out;
1078
1079 prealloc_cf = ceph_alloc_cap_flush();
1080 if (!prealloc_cf)
1081 goto out;
1082
1083 spin_lock(&ci->i_ceph_lock);
1084retry:
1085 issued = __ceph_caps_issued(ci, NULL);
1086 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1087 goto do_sync;
1088
1089 if (!lock_snap_rwsem && !ci->i_head_snapc) {
1090 lock_snap_rwsem = true;
1091 if (!down_read_trylock(&mdsc->snap_rwsem)) {
1092 spin_unlock(&ci->i_ceph_lock);
1093 down_read(&mdsc->snap_rwsem);
1094 spin_lock(&ci->i_ceph_lock);
1095 goto retry;
1096 }
1097 }
1098
1099 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
1100 __build_xattrs(inode);
1101
1102 required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1103
1104 if (!ci->i_xattrs.prealloc_blob ||
1105 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1106 struct ceph_buffer *blob;
1107
1108 spin_unlock(&ci->i_ceph_lock);
1109 dout(" preaallocating new blob size=%d\n", required_blob_size);
1110 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1111 if (!blob)
1112 goto do_sync_unlocked;
1113 spin_lock(&ci->i_ceph_lock);
1114 if (ci->i_xattrs.prealloc_blob)
1115 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1116 ci->i_xattrs.prealloc_blob = blob;
1117 goto retry;
1118 }
1119
1120 err = __set_xattr(ci, newname, name_len, newval, val_len,
1121 flags, value ? 1 : -1, &xattr);
1122
1123 if (!err) {
1124 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1125 &prealloc_cf);
1126 ci->i_xattrs.dirty = true;
1127 inode->i_ctime = current_fs_time(inode->i_sb);
1128 }
1129
1130 spin_unlock(&ci->i_ceph_lock);
1131 if (lock_snap_rwsem)
1132 up_read(&mdsc->snap_rwsem);
1133 if (dirty)
1134 __mark_inode_dirty(inode, dirty);
1135 ceph_free_cap_flush(prealloc_cf);
1136 return err;
1137
1138do_sync:
1139 spin_unlock(&ci->i_ceph_lock);
1140do_sync_unlocked:
1141 if (lock_snap_rwsem)
1142 up_read(&mdsc->snap_rwsem);
1143
1144
1145 if (current->journal_info) {
1146 pr_warn_ratelimited("sync setxattr %p "
1147 "during filling trace\n", inode);
1148 err = -EBUSY;
1149 } else {
1150 err = ceph_sync_setxattr(dentry, name, value, size, flags);
1151 if (err >= 0 && check_realm) {
1152
1153 spin_lock(&ci->i_ceph_lock);
1154 if ((ci->i_max_files || ci->i_max_bytes) &&
1155 !(ci->i_snap_realm &&
1156 ci->i_snap_realm->ino == ci->i_vino.ino))
1157 err = -EOPNOTSUPP;
1158 spin_unlock(&ci->i_ceph_lock);
1159 }
1160 }
1161out:
1162 ceph_free_cap_flush(prealloc_cf);
1163 kfree(newname);
1164 kfree(newval);
1165 kfree(xattr);
1166 return err;
1167}
1168
1169int ceph_setxattr(struct dentry *dentry, const char *name,
1170 const void *value, size_t size, int flags)
1171{
1172 if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
1173 return -EROFS;
1174
1175 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1176 return generic_setxattr(dentry, name, value, size, flags);
1177
1178 if (size == 0)
1179 value = "";
1180
1181 return __ceph_setxattr(dentry, name, value, size, flags);
1182}
1183
1184static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1185{
1186 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
1187 struct ceph_mds_client *mdsc = fsc->mdsc;
1188 struct inode *inode = dentry->d_inode;
1189 struct ceph_mds_request *req;
1190 int err;
1191
1192 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
1193 USE_AUTH_MDS);
1194 if (IS_ERR(req))
1195 return PTR_ERR(req);
1196 req->r_path2 = kstrdup(name, GFP_NOFS);
1197 if (!req->r_path2)
1198 return -ENOMEM;
1199
1200 req->r_inode = inode;
1201 ihold(inode);
1202 req->r_num_caps = 1;
1203 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1204 err = ceph_mdsc_do_request(mdsc, NULL, req);
1205 ceph_mdsc_put_request(req);
1206 return err;
1207}
1208
1209int __ceph_removexattr(struct dentry *dentry, const char *name)
1210{
1211 struct inode *inode = dentry->d_inode;
1212 struct ceph_vxattr *vxattr;
1213 struct ceph_inode_info *ci = ceph_inode(inode);
1214 struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
1215 struct ceph_cap_flush *prealloc_cf = NULL;
1216 int issued;
1217 int err;
1218 int required_blob_size;
1219 int dirty;
1220 bool lock_snap_rwsem = false;
1221
1222 if (!ceph_is_valid_xattr(name))
1223 return -EOPNOTSUPP;
1224
1225 vxattr = ceph_match_vxattr(inode, name);
1226 if (vxattr && vxattr->readonly)
1227 return -EOPNOTSUPP;
1228
1229
1230 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1231 goto do_sync_unlocked;
1232
1233 prealloc_cf = ceph_alloc_cap_flush();
1234 if (!prealloc_cf)
1235 return -ENOMEM;
1236
1237 err = -ENOMEM;
1238 spin_lock(&ci->i_ceph_lock);
1239retry:
1240 issued = __ceph_caps_issued(ci, NULL);
1241 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1242 goto do_sync;
1243
1244 if (!lock_snap_rwsem && !ci->i_head_snapc) {
1245 lock_snap_rwsem = true;
1246 if (!down_read_trylock(&mdsc->snap_rwsem)) {
1247 spin_unlock(&ci->i_ceph_lock);
1248 down_read(&mdsc->snap_rwsem);
1249 spin_lock(&ci->i_ceph_lock);
1250 goto retry;
1251 }
1252 }
1253
1254 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1255
1256 __build_xattrs(inode);
1257
1258 required_blob_size = __get_required_blob_size(ci, 0, 0);
1259
1260 if (!ci->i_xattrs.prealloc_blob ||
1261 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1262 struct ceph_buffer *blob;
1263
1264 spin_unlock(&ci->i_ceph_lock);
1265 dout(" preaallocating new blob size=%d\n", required_blob_size);
1266 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1267 if (!blob)
1268 goto do_sync_unlocked;
1269 spin_lock(&ci->i_ceph_lock);
1270 if (ci->i_xattrs.prealloc_blob)
1271 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
1272 ci->i_xattrs.prealloc_blob = blob;
1273 goto retry;
1274 }
1275
1276 err = __remove_xattr_by_name(ceph_inode(inode), name);
1277
1278 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1279 &prealloc_cf);
1280 ci->i_xattrs.dirty = true;
1281 inode->i_ctime = current_fs_time(inode->i_sb);
1282 spin_unlock(&ci->i_ceph_lock);
1283 if (lock_snap_rwsem)
1284 up_read(&mdsc->snap_rwsem);
1285 if (dirty)
1286 __mark_inode_dirty(inode, dirty);
1287 ceph_free_cap_flush(prealloc_cf);
1288 return err;
1289do_sync:
1290 spin_unlock(&ci->i_ceph_lock);
1291do_sync_unlocked:
1292 if (lock_snap_rwsem)
1293 up_read(&mdsc->snap_rwsem);
1294 ceph_free_cap_flush(prealloc_cf);
1295 err = ceph_send_removexattr(dentry, name);
1296 return err;
1297}
1298
1299int ceph_removexattr(struct dentry *dentry, const char *name)
1300{
1301 if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
1302 return -EROFS;
1303
1304 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
1305 return generic_removexattr(dentry, name);
1306
1307 return __ceph_removexattr(dentry, name);
1308}
1309
1310#ifdef CONFIG_SECURITY
1311bool ceph_security_xattr_wanted(struct inode *in)
1312{
1313 return in->i_security != NULL;
1314}
1315
1316bool ceph_security_xattr_deadlock(struct inode *in)
1317{
1318 struct ceph_inode_info *ci;
1319 bool ret;
1320 if (!in->i_security)
1321 return false;
1322 ci = ceph_inode(in);
1323 spin_lock(&ci->i_ceph_lock);
1324 ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1325 !(ci->i_xattrs.version > 0 &&
1326 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1327 spin_unlock(&ci->i_ceph_lock);
1328 return ret;
1329}
1330#endif
1331