1
2
3
4
5
6
7
8
9
10#include <linux/sched.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/idr.h>
14#include <linux/slab.h>
15#include <linux/security.h>
16#include <linux/hash.h>
17
18#include "kernfs-internal.h"
19
20DECLARE_RWSEM(kernfs_rwsem);
21static DEFINE_SPINLOCK(kernfs_rename_lock);
22static char kernfs_pr_cont_buf[PATH_MAX];
23static DEFINE_SPINLOCK(kernfs_idr_lock);
24
25#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
26
27static bool kernfs_active(struct kernfs_node *kn)
28{
29 lockdep_assert_held(&kernfs_rwsem);
30 return atomic_read(&kn->active) >= 0;
31}
32
33static bool kernfs_lockdep(struct kernfs_node *kn)
34{
35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 return kn->flags & KERNFS_LOCKDEP;
37#else
38 return false;
39#endif
40}
41
42static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
43{
44 if (!kn)
45 return strlcpy(buf, "(null)", buflen);
46
47 return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
48}
49
50
51static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
52{
53 size_t depth = 0;
54
55 while (to->parent && to != from) {
56 depth++;
57 to = to->parent;
58 }
59 return depth;
60}
61
62static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
63 struct kernfs_node *b)
64{
65 size_t da, db;
66 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
67
68 if (ra != rb)
69 return NULL;
70
71 da = kernfs_depth(ra->kn, a);
72 db = kernfs_depth(rb->kn, b);
73
74 while (da > db) {
75 a = a->parent;
76 da--;
77 }
78 while (db > da) {
79 b = b->parent;
80 db--;
81 }
82
83
84 while (b != a) {
85 b = b->parent;
86 a = a->parent;
87 }
88
89 return a;
90}
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
123 struct kernfs_node *kn_from,
124 char *buf, size_t buflen)
125{
126 struct kernfs_node *kn, *common;
127 const char parent_str[] = "/..";
128 size_t depth_from, depth_to, len = 0;
129 int i, j;
130
131 if (!kn_to)
132 return strlcpy(buf, "(null)", buflen);
133
134 if (!kn_from)
135 kn_from = kernfs_root(kn_to)->kn;
136
137 if (kn_from == kn_to)
138 return strlcpy(buf, "/", buflen);
139
140 if (!buf)
141 return -EINVAL;
142
143 common = kernfs_common_ancestor(kn_from, kn_to);
144 if (WARN_ON(!common))
145 return -EINVAL;
146
147 depth_to = kernfs_depth(common, kn_to);
148 depth_from = kernfs_depth(common, kn_from);
149
150 buf[0] = '\0';
151
152 for (i = 0; i < depth_from; i++)
153 len += strlcpy(buf + len, parent_str,
154 len < buflen ? buflen - len : 0);
155
156
157 for (i = depth_to - 1; i >= 0; i--) {
158 for (kn = kn_to, j = 0; j < i; j++)
159 kn = kn->parent;
160 len += strlcpy(buf + len, "/",
161 len < buflen ? buflen - len : 0);
162 len += strlcpy(buf + len, kn->name,
163 len < buflen ? buflen - len : 0);
164 }
165
166 return len;
167}
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
184{
185 unsigned long flags;
186 int ret;
187
188 spin_lock_irqsave(&kernfs_rename_lock, flags);
189 ret = kernfs_name_locked(kn, buf, buflen);
190 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
191 return ret;
192}
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
211 char *buf, size_t buflen)
212{
213 unsigned long flags;
214 int ret;
215
216 spin_lock_irqsave(&kernfs_rename_lock, flags);
217 ret = kernfs_path_from_node_locked(to, from, buf, buflen);
218 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
219 return ret;
220}
221EXPORT_SYMBOL_GPL(kernfs_path_from_node);
222
223
224
225
226
227
228
229void pr_cont_kernfs_name(struct kernfs_node *kn)
230{
231 unsigned long flags;
232
233 spin_lock_irqsave(&kernfs_rename_lock, flags);
234
235 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
236 pr_cont("%s", kernfs_pr_cont_buf);
237
238 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
239}
240
241
242
243
244
245
246
247void pr_cont_kernfs_path(struct kernfs_node *kn)
248{
249 unsigned long flags;
250 int sz;
251
252 spin_lock_irqsave(&kernfs_rename_lock, flags);
253
254 sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
255 sizeof(kernfs_pr_cont_buf));
256 if (sz < 0) {
257 pr_cont("(error)");
258 goto out;
259 }
260
261 if (sz >= sizeof(kernfs_pr_cont_buf)) {
262 pr_cont("(name too long)");
263 goto out;
264 }
265
266 pr_cont("%s", kernfs_pr_cont_buf);
267
268out:
269 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
270}
271
272
273
274
275
276
277
278
279struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
280{
281 struct kernfs_node *parent;
282 unsigned long flags;
283
284 spin_lock_irqsave(&kernfs_rename_lock, flags);
285 parent = kn->parent;
286 kernfs_get(parent);
287 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
288
289 return parent;
290}
291
292
293
294
295
296
297
298
299static unsigned int kernfs_name_hash(const char *name, const void *ns)
300{
301 unsigned long hash = init_name_hash(ns);
302 unsigned int len = strlen(name);
303 while (len--)
304 hash = partial_name_hash(*name++, hash);
305 hash = end_name_hash(hash);
306 hash &= 0x7fffffffU;
307
308 if (hash < 2)
309 hash += 2;
310 if (hash >= INT_MAX)
311 hash = INT_MAX - 1;
312 return hash;
313}
314
315static int kernfs_name_compare(unsigned int hash, const char *name,
316 const void *ns, const struct kernfs_node *kn)
317{
318 if (hash < kn->hash)
319 return -1;
320 if (hash > kn->hash)
321 return 1;
322 if (ns < kn->ns)
323 return -1;
324 if (ns > kn->ns)
325 return 1;
326 return strcmp(name, kn->name);
327}
328
329static int kernfs_sd_compare(const struct kernfs_node *left,
330 const struct kernfs_node *right)
331{
332 return kernfs_name_compare(left->hash, left->name, left->ns, right);
333}
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348static int kernfs_link_sibling(struct kernfs_node *kn)
349{
350 struct rb_node **node = &kn->parent->dir.children.rb_node;
351 struct rb_node *parent = NULL;
352
353 while (*node) {
354 struct kernfs_node *pos;
355 int result;
356
357 pos = rb_to_kn(*node);
358 parent = *node;
359 result = kernfs_sd_compare(kn, pos);
360 if (result < 0)
361 node = &pos->rb.rb_left;
362 else if (result > 0)
363 node = &pos->rb.rb_right;
364 else
365 return -EEXIST;
366 }
367
368
369 rb_link_node(&kn->rb, parent, node);
370 rb_insert_color(&kn->rb, &kn->parent->dir.children);
371
372
373 if (kernfs_type(kn) == KERNFS_DIR)
374 kn->parent->dir.subdirs++;
375 kernfs_inc_rev(kn->parent);
376
377 return 0;
378}
379
380
381
382
383
384
385
386
387
388
389
390
391static bool kernfs_unlink_sibling(struct kernfs_node *kn)
392{
393 if (RB_EMPTY_NODE(&kn->rb))
394 return false;
395
396 if (kernfs_type(kn) == KERNFS_DIR)
397 kn->parent->dir.subdirs--;
398 kernfs_inc_rev(kn->parent);
399
400 rb_erase(&kn->rb, &kn->parent->dir.children);
401 RB_CLEAR_NODE(&kn->rb);
402 return true;
403}
404
405
406
407
408
409
410
411
412
413
414
415struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
416{
417 if (unlikely(!kn))
418 return NULL;
419
420 if (!atomic_inc_unless_negative(&kn->active))
421 return NULL;
422
423 if (kernfs_lockdep(kn))
424 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
425 return kn;
426}
427
428
429
430
431
432
433
434
435void kernfs_put_active(struct kernfs_node *kn)
436{
437 int v;
438
439 if (unlikely(!kn))
440 return;
441
442 if (kernfs_lockdep(kn))
443 rwsem_release(&kn->dep_map, _RET_IP_);
444 v = atomic_dec_return(&kn->active);
445 if (likely(v != KN_DEACTIVATED_BIAS))
446 return;
447
448 wake_up_all(&kernfs_root(kn)->deactivate_waitq);
449}
450
451
452
453
454
455
456
457
458
459static void kernfs_drain(struct kernfs_node *kn)
460 __releases(&kernfs_rwsem) __acquires(&kernfs_rwsem)
461{
462 struct kernfs_root *root = kernfs_root(kn);
463
464 lockdep_assert_held_write(&kernfs_rwsem);
465 WARN_ON_ONCE(kernfs_active(kn));
466
467 up_write(&kernfs_rwsem);
468
469 if (kernfs_lockdep(kn)) {
470 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
471 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
472 lock_contended(&kn->dep_map, _RET_IP_);
473 }
474
475
476 wait_event(root->deactivate_waitq,
477 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
478
479 if (kernfs_lockdep(kn)) {
480 lock_acquired(&kn->dep_map, _RET_IP_);
481 rwsem_release(&kn->dep_map, _RET_IP_);
482 }
483
484 kernfs_drain_open_files(kn);
485
486 down_write(&kernfs_rwsem);
487}
488
489
490
491
492
493void kernfs_get(struct kernfs_node *kn)
494{
495 if (kn) {
496 WARN_ON(!atomic_read(&kn->count));
497 atomic_inc(&kn->count);
498 }
499}
500EXPORT_SYMBOL_GPL(kernfs_get);
501
502
503
504
505
506
507
508void kernfs_put(struct kernfs_node *kn)
509{
510 struct kernfs_node *parent;
511 struct kernfs_root *root;
512
513 if (!kn || !atomic_dec_and_test(&kn->count))
514 return;
515 root = kernfs_root(kn);
516 repeat:
517
518
519
520
521 parent = kn->parent;
522
523 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
524 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
525 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
526
527 if (kernfs_type(kn) == KERNFS_LINK)
528 kernfs_put(kn->symlink.target_kn);
529
530 kfree_const(kn->name);
531
532 if (kn->iattr) {
533 simple_xattrs_free(&kn->iattr->xattrs);
534 kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
535 }
536 spin_lock(&kernfs_idr_lock);
537 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
538 spin_unlock(&kernfs_idr_lock);
539 kmem_cache_free(kernfs_node_cache, kn);
540
541 kn = parent;
542 if (kn) {
543 if (atomic_dec_and_test(&kn->count))
544 goto repeat;
545 } else {
546
547 idr_destroy(&root->ino_idr);
548 kfree(root);
549 }
550}
551EXPORT_SYMBOL_GPL(kernfs_put);
552
553
554
555
556
557
558
559
560
561
562
563
564struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
565{
566 if (dentry->d_sb->s_op == &kernfs_sops)
567 return kernfs_dentry_node(dentry);
568 return NULL;
569}
570
571static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
572 struct kernfs_node *parent,
573 const char *name, umode_t mode,
574 kuid_t uid, kgid_t gid,
575 unsigned flags)
576{
577 struct kernfs_node *kn;
578 u32 id_highbits;
579 int ret;
580
581 name = kstrdup_const(name, GFP_KERNEL);
582 if (!name)
583 return NULL;
584
585 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
586 if (!kn)
587 goto err_out1;
588
589 idr_preload(GFP_KERNEL);
590 spin_lock(&kernfs_idr_lock);
591 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
592 if (ret >= 0 && ret < root->last_id_lowbits)
593 root->id_highbits++;
594 id_highbits = root->id_highbits;
595 root->last_id_lowbits = ret;
596 spin_unlock(&kernfs_idr_lock);
597 idr_preload_end();
598 if (ret < 0)
599 goto err_out2;
600
601 kn->id = (u64)id_highbits << 32 | ret;
602
603 atomic_set(&kn->count, 1);
604 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
605 RB_CLEAR_NODE(&kn->rb);
606
607 kn->name = name;
608 kn->mode = mode;
609 kn->flags = flags;
610
611 if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
612 struct iattr iattr = {
613 .ia_valid = ATTR_UID | ATTR_GID,
614 .ia_uid = uid,
615 .ia_gid = gid,
616 };
617
618 ret = __kernfs_setattr(kn, &iattr);
619 if (ret < 0)
620 goto err_out3;
621 }
622
623 if (parent) {
624 ret = security_kernfs_init_security(parent, kn);
625 if (ret)
626 goto err_out3;
627 }
628
629 return kn;
630
631 err_out3:
632 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
633 err_out2:
634 kmem_cache_free(kernfs_node_cache, kn);
635 err_out1:
636 kfree_const(name);
637 return NULL;
638}
639
640struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
641 const char *name, umode_t mode,
642 kuid_t uid, kgid_t gid,
643 unsigned flags)
644{
645 struct kernfs_node *kn;
646
647 kn = __kernfs_new_node(kernfs_root(parent), parent,
648 name, mode, uid, gid, flags);
649 if (kn) {
650 kernfs_get(parent);
651 kn->parent = parent;
652 }
653 return kn;
654}
655
656
657
658
659
660
661
662
663
664
665
666
667struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
668 u64 id)
669{
670 struct kernfs_node *kn;
671 ino_t ino = kernfs_id_ino(id);
672 u32 gen = kernfs_id_gen(id);
673
674 spin_lock(&kernfs_idr_lock);
675
676 kn = idr_find(&root->ino_idr, (u32)ino);
677 if (!kn)
678 goto err_unlock;
679
680 if (sizeof(ino_t) >= sizeof(u64)) {
681
682 if (kernfs_ino(kn) != ino)
683 goto err_unlock;
684 } else {
685
686 if (unlikely(gen && kernfs_gen(kn) != gen))
687 goto err_unlock;
688 }
689
690
691
692
693
694
695 if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
696 !atomic_inc_not_zero(&kn->count)))
697 goto err_unlock;
698
699 spin_unlock(&kernfs_idr_lock);
700 return kn;
701err_unlock:
702 spin_unlock(&kernfs_idr_lock);
703 return NULL;
704}
705
706
707
708
709
710
711
712
713
714
715
716
717
718int kernfs_add_one(struct kernfs_node *kn)
719{
720 struct kernfs_node *parent = kn->parent;
721 struct kernfs_iattrs *ps_iattr;
722 bool has_ns;
723 int ret;
724
725 down_write(&kernfs_rwsem);
726
727 ret = -EINVAL;
728 has_ns = kernfs_ns_enabled(parent);
729 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
730 has_ns ? "required" : "invalid", parent->name, kn->name))
731 goto out_unlock;
732
733 if (kernfs_type(parent) != KERNFS_DIR)
734 goto out_unlock;
735
736 ret = -ENOENT;
737 if (parent->flags & KERNFS_EMPTY_DIR)
738 goto out_unlock;
739
740 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
741 goto out_unlock;
742
743 kn->hash = kernfs_name_hash(kn->name, kn->ns);
744
745 ret = kernfs_link_sibling(kn);
746 if (ret)
747 goto out_unlock;
748
749
750 ps_iattr = parent->iattr;
751 if (ps_iattr) {
752 ktime_get_real_ts64(&ps_iattr->ia_ctime);
753 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
754 }
755
756 up_write(&kernfs_rwsem);
757
758
759
760
761
762
763
764
765 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
766 kernfs_activate(kn);
767 return 0;
768
769out_unlock:
770 up_write(&kernfs_rwsem);
771 return ret;
772}
773
774
775
776
777
778
779
780
781
782
783static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
784 const unsigned char *name,
785 const void *ns)
786{
787 struct rb_node *node = parent->dir.children.rb_node;
788 bool has_ns = kernfs_ns_enabled(parent);
789 unsigned int hash;
790
791 lockdep_assert_held(&kernfs_rwsem);
792
793 if (has_ns != (bool)ns) {
794 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
795 has_ns ? "required" : "invalid", parent->name, name);
796 return NULL;
797 }
798
799 hash = kernfs_name_hash(name, ns);
800 while (node) {
801 struct kernfs_node *kn;
802 int result;
803
804 kn = rb_to_kn(node);
805 result = kernfs_name_compare(hash, name, ns, kn);
806 if (result < 0)
807 node = node->rb_left;
808 else if (result > 0)
809 node = node->rb_right;
810 else
811 return kn;
812 }
813 return NULL;
814}
815
816static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
817 const unsigned char *path,
818 const void *ns)
819{
820 size_t len;
821 char *p, *name;
822
823 lockdep_assert_held_read(&kernfs_rwsem);
824
825
826 spin_lock_irq(&kernfs_rename_lock);
827
828 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
829
830 if (len >= sizeof(kernfs_pr_cont_buf)) {
831 spin_unlock_irq(&kernfs_rename_lock);
832 return NULL;
833 }
834
835 p = kernfs_pr_cont_buf;
836
837 while ((name = strsep(&p, "/")) && parent) {
838 if (*name == '\0')
839 continue;
840 parent = kernfs_find_ns(parent, name, ns);
841 }
842
843 spin_unlock_irq(&kernfs_rename_lock);
844
845 return parent;
846}
847
848
849
850
851
852
853
854
855
856
857
858struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
859 const char *name, const void *ns)
860{
861 struct kernfs_node *kn;
862
863 down_read(&kernfs_rwsem);
864 kn = kernfs_find_ns(parent, name, ns);
865 kernfs_get(kn);
866 up_read(&kernfs_rwsem);
867
868 return kn;
869}
870EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
871
872
873
874
875
876
877
878
879
880
881
882struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
883 const char *path, const void *ns)
884{
885 struct kernfs_node *kn;
886
887 down_read(&kernfs_rwsem);
888 kn = kernfs_walk_ns(parent, path, ns);
889 kernfs_get(kn);
890 up_read(&kernfs_rwsem);
891
892 return kn;
893}
894
895
896
897
898
899
900
901
902
903
904struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
905 unsigned int flags, void *priv)
906{
907 struct kernfs_root *root;
908 struct kernfs_node *kn;
909
910 root = kzalloc(sizeof(*root), GFP_KERNEL);
911 if (!root)
912 return ERR_PTR(-ENOMEM);
913
914 idr_init(&root->ino_idr);
915 INIT_LIST_HEAD(&root->supers);
916
917
918
919
920
921
922
923 if (sizeof(ino_t) >= sizeof(u64))
924 root->id_highbits = 0;
925 else
926 root->id_highbits = 1;
927
928 kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
929 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
930 KERNFS_DIR);
931 if (!kn) {
932 idr_destroy(&root->ino_idr);
933 kfree(root);
934 return ERR_PTR(-ENOMEM);
935 }
936
937 kn->priv = priv;
938 kn->dir.root = root;
939
940 root->syscall_ops = scops;
941 root->flags = flags;
942 root->kn = kn;
943 init_waitqueue_head(&root->deactivate_waitq);
944
945 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
946 kernfs_activate(kn);
947
948 return root;
949}
950
951
952
953
954
955
956
957
958void kernfs_destroy_root(struct kernfs_root *root)
959{
960 kernfs_remove(root->kn);
961}
962
963
964
965
966
967
968
969
970
971
972
973
974
975struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
976 const char *name, umode_t mode,
977 kuid_t uid, kgid_t gid,
978 void *priv, const void *ns)
979{
980 struct kernfs_node *kn;
981 int rc;
982
983
984 kn = kernfs_new_node(parent, name, mode | S_IFDIR,
985 uid, gid, KERNFS_DIR);
986 if (!kn)
987 return ERR_PTR(-ENOMEM);
988
989 kn->dir.root = parent->dir.root;
990 kn->ns = ns;
991 kn->priv = priv;
992
993
994 rc = kernfs_add_one(kn);
995 if (!rc)
996 return kn;
997
998 kernfs_put(kn);
999 return ERR_PTR(rc);
1000}
1001
1002
1003
1004
1005
1006
1007
1008
1009struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
1010 const char *name)
1011{
1012 struct kernfs_node *kn;
1013 int rc;
1014
1015
1016 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
1017 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
1018 if (!kn)
1019 return ERR_PTR(-ENOMEM);
1020
1021 kn->flags |= KERNFS_EMPTY_DIR;
1022 kn->dir.root = parent->dir.root;
1023 kn->ns = NULL;
1024 kn->priv = NULL;
1025
1026
1027 rc = kernfs_add_one(kn);
1028 if (!rc)
1029 return kn;
1030
1031 kernfs_put(kn);
1032 return ERR_PTR(rc);
1033}
1034
1035static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
1036{
1037 struct kernfs_node *kn;
1038
1039 if (flags & LOOKUP_RCU)
1040 return -ECHILD;
1041
1042
1043 if (d_really_is_negative(dentry)) {
1044 struct kernfs_node *parent;
1045
1046
1047
1048
1049 down_read(&kernfs_rwsem);
1050 spin_lock(&dentry->d_lock);
1051 parent = kernfs_dentry_node(dentry->d_parent);
1052 if (parent) {
1053 if (kernfs_dir_changed(parent, dentry)) {
1054 spin_unlock(&dentry->d_lock);
1055 up_read(&kernfs_rwsem);
1056 return 0;
1057 }
1058 }
1059 spin_unlock(&dentry->d_lock);
1060 up_read(&kernfs_rwsem);
1061
1062
1063
1064
1065 return 1;
1066 }
1067
1068 kn = kernfs_dentry_node(dentry);
1069 down_read(&kernfs_rwsem);
1070
1071
1072 if (!kernfs_active(kn))
1073 goto out_bad;
1074
1075
1076 if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
1077 goto out_bad;
1078
1079
1080 if (strcmp(dentry->d_name.name, kn->name) != 0)
1081 goto out_bad;
1082
1083
1084 if (kn->parent && kernfs_ns_enabled(kn->parent) &&
1085 kernfs_info(dentry->d_sb)->ns != kn->ns)
1086 goto out_bad;
1087
1088 up_read(&kernfs_rwsem);
1089 return 1;
1090out_bad:
1091 up_read(&kernfs_rwsem);
1092 return 0;
1093}
1094
1095const struct dentry_operations kernfs_dops = {
1096 .d_revalidate = kernfs_dop_revalidate,
1097};
1098
1099static struct dentry *kernfs_iop_lookup(struct inode *dir,
1100 struct dentry *dentry,
1101 unsigned int flags)
1102{
1103 struct kernfs_node *parent = dir->i_private;
1104 struct kernfs_node *kn;
1105 struct inode *inode = NULL;
1106 const void *ns = NULL;
1107
1108 down_read(&kernfs_rwsem);
1109 if (kernfs_ns_enabled(parent))
1110 ns = kernfs_info(dir->i_sb)->ns;
1111
1112 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1113
1114 if (kn) {
1115
1116
1117
1118 if (!kernfs_active(kn)) {
1119 up_read(&kernfs_rwsem);
1120 return NULL;
1121 }
1122 inode = kernfs_get_inode(dir->i_sb, kn);
1123 if (!inode)
1124 inode = ERR_PTR(-ENOMEM);
1125 }
1126
1127
1128
1129
1130
1131
1132 if (!IS_ERR(inode))
1133 kernfs_set_rev(parent, dentry);
1134 up_read(&kernfs_rwsem);
1135
1136
1137 return d_splice_alias(inode, dentry);
1138}
1139
1140static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
1141 struct inode *dir, struct dentry *dentry,
1142 umode_t mode)
1143{
1144 struct kernfs_node *parent = dir->i_private;
1145 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1146 int ret;
1147
1148 if (!scops || !scops->mkdir)
1149 return -EPERM;
1150
1151 if (!kernfs_get_active(parent))
1152 return -ENODEV;
1153
1154 ret = scops->mkdir(parent, dentry->d_name.name, mode);
1155
1156 kernfs_put_active(parent);
1157 return ret;
1158}
1159
1160static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1161{
1162 struct kernfs_node *kn = kernfs_dentry_node(dentry);
1163 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1164 int ret;
1165
1166 if (!scops || !scops->rmdir)
1167 return -EPERM;
1168
1169 if (!kernfs_get_active(kn))
1170 return -ENODEV;
1171
1172 ret = scops->rmdir(kn);
1173
1174 kernfs_put_active(kn);
1175 return ret;
1176}
1177
1178static int kernfs_iop_rename(struct user_namespace *mnt_userns,
1179 struct inode *old_dir, struct dentry *old_dentry,
1180 struct inode *new_dir, struct dentry *new_dentry,
1181 unsigned int flags)
1182{
1183 struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
1184 struct kernfs_node *new_parent = new_dir->i_private;
1185 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1186 int ret;
1187
1188 if (flags)
1189 return -EINVAL;
1190
1191 if (!scops || !scops->rename)
1192 return -EPERM;
1193
1194 if (!kernfs_get_active(kn))
1195 return -ENODEV;
1196
1197 if (!kernfs_get_active(new_parent)) {
1198 kernfs_put_active(kn);
1199 return -ENODEV;
1200 }
1201
1202 ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1203
1204 kernfs_put_active(new_parent);
1205 kernfs_put_active(kn);
1206 return ret;
1207}
1208
1209const struct inode_operations kernfs_dir_iops = {
1210 .lookup = kernfs_iop_lookup,
1211 .permission = kernfs_iop_permission,
1212 .setattr = kernfs_iop_setattr,
1213 .getattr = kernfs_iop_getattr,
1214 .listxattr = kernfs_iop_listxattr,
1215
1216 .mkdir = kernfs_iop_mkdir,
1217 .rmdir = kernfs_iop_rmdir,
1218 .rename = kernfs_iop_rename,
1219};
1220
1221static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1222{
1223 struct kernfs_node *last;
1224
1225 while (true) {
1226 struct rb_node *rbn;
1227
1228 last = pos;
1229
1230 if (kernfs_type(pos) != KERNFS_DIR)
1231 break;
1232
1233 rbn = rb_first(&pos->dir.children);
1234 if (!rbn)
1235 break;
1236
1237 pos = rb_to_kn(rbn);
1238 }
1239
1240 return last;
1241}
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1253 struct kernfs_node *root)
1254{
1255 struct rb_node *rbn;
1256
1257 lockdep_assert_held_write(&kernfs_rwsem);
1258
1259
1260 if (!pos)
1261 return kernfs_leftmost_descendant(root);
1262
1263
1264 if (pos == root)
1265 return NULL;
1266
1267
1268 rbn = rb_next(&pos->rb);
1269 if (rbn)
1270 return kernfs_leftmost_descendant(rb_to_kn(rbn));
1271
1272
1273 return pos->parent;
1274}
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289void kernfs_activate(struct kernfs_node *kn)
1290{
1291 struct kernfs_node *pos;
1292
1293 down_write(&kernfs_rwsem);
1294
1295 pos = NULL;
1296 while ((pos = kernfs_next_descendant_post(pos, kn))) {
1297 if (pos->flags & KERNFS_ACTIVATED)
1298 continue;
1299
1300 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1301 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1302
1303 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1304 pos->flags |= KERNFS_ACTIVATED;
1305 }
1306
1307 up_write(&kernfs_rwsem);
1308}
1309
1310static void __kernfs_remove(struct kernfs_node *kn)
1311{
1312 struct kernfs_node *pos;
1313
1314 lockdep_assert_held_write(&kernfs_rwsem);
1315
1316
1317
1318
1319
1320
1321 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1322 return;
1323
1324 pr_debug("kernfs %s: removing\n", kn->name);
1325
1326
1327 pos = NULL;
1328 while ((pos = kernfs_next_descendant_post(pos, kn)))
1329 if (kernfs_active(pos))
1330 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1331
1332
1333 do {
1334 pos = kernfs_leftmost_descendant(kn);
1335
1336
1337
1338
1339
1340
1341
1342 kernfs_get(pos);
1343
1344
1345
1346
1347
1348
1349
1350 if (kn->flags & KERNFS_ACTIVATED)
1351 kernfs_drain(pos);
1352 else
1353 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1354
1355
1356
1357
1358
1359 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1360 struct kernfs_iattrs *ps_iattr =
1361 pos->parent ? pos->parent->iattr : NULL;
1362
1363
1364 if (ps_iattr) {
1365 ktime_get_real_ts64(&ps_iattr->ia_ctime);
1366 ps_iattr->ia_mtime = ps_iattr->ia_ctime;
1367 }
1368
1369 kernfs_put(pos);
1370 }
1371
1372 kernfs_put(pos);
1373 } while (pos != kn);
1374}
1375
1376
1377
1378
1379
1380
1381
1382void kernfs_remove(struct kernfs_node *kn)
1383{
1384 down_write(&kernfs_rwsem);
1385 __kernfs_remove(kn);
1386 up_write(&kernfs_rwsem);
1387}
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403void kernfs_break_active_protection(struct kernfs_node *kn)
1404{
1405
1406
1407
1408
1409 kernfs_put_active(kn);
1410}
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1428{
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438 atomic_inc(&kn->active);
1439 if (kernfs_lockdep(kn))
1440 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1441}
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469bool kernfs_remove_self(struct kernfs_node *kn)
1470{
1471 bool ret;
1472
1473 down_write(&kernfs_rwsem);
1474 kernfs_break_active_protection(kn);
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 if (!(kn->flags & KERNFS_SUICIDAL)) {
1486 kn->flags |= KERNFS_SUICIDAL;
1487 __kernfs_remove(kn);
1488 kn->flags |= KERNFS_SUICIDED;
1489 ret = true;
1490 } else {
1491 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1492 DEFINE_WAIT(wait);
1493
1494 while (true) {
1495 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1496
1497 if ((kn->flags & KERNFS_SUICIDED) &&
1498 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1499 break;
1500
1501 up_write(&kernfs_rwsem);
1502 schedule();
1503 down_write(&kernfs_rwsem);
1504 }
1505 finish_wait(waitq, &wait);
1506 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1507 ret = false;
1508 }
1509
1510
1511
1512
1513
1514 kernfs_unbreak_active_protection(kn);
1515
1516 up_write(&kernfs_rwsem);
1517 return ret;
1518}
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1530 const void *ns)
1531{
1532 struct kernfs_node *kn;
1533
1534 if (!parent) {
1535 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1536 name);
1537 return -ENOENT;
1538 }
1539
1540 down_write(&kernfs_rwsem);
1541
1542 kn = kernfs_find_ns(parent, name, ns);
1543 if (kn)
1544 __kernfs_remove(kn);
1545
1546 up_write(&kernfs_rwsem);
1547
1548 if (kn)
1549 return 0;
1550 else
1551 return -ENOENT;
1552}
1553
1554
1555
1556
1557
1558
1559
1560
1561int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1562 const char *new_name, const void *new_ns)
1563{
1564 struct kernfs_node *old_parent;
1565 const char *old_name = NULL;
1566 int error;
1567
1568
1569 if (!kn->parent)
1570 return -EINVAL;
1571
1572 down_write(&kernfs_rwsem);
1573
1574 error = -ENOENT;
1575 if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1576 (new_parent->flags & KERNFS_EMPTY_DIR))
1577 goto out;
1578
1579 error = 0;
1580 if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1581 (strcmp(kn->name, new_name) == 0))
1582 goto out;
1583
1584 error = -EEXIST;
1585 if (kernfs_find_ns(new_parent, new_name, new_ns))
1586 goto out;
1587
1588
1589 if (strcmp(kn->name, new_name) != 0) {
1590 error = -ENOMEM;
1591 new_name = kstrdup_const(new_name, GFP_KERNEL);
1592 if (!new_name)
1593 goto out;
1594 } else {
1595 new_name = NULL;
1596 }
1597
1598
1599
1600
1601 kernfs_unlink_sibling(kn);
1602 kernfs_get(new_parent);
1603
1604
1605 spin_lock_irq(&kernfs_rename_lock);
1606
1607 old_parent = kn->parent;
1608 kn->parent = new_parent;
1609
1610 kn->ns = new_ns;
1611 if (new_name) {
1612 old_name = kn->name;
1613 kn->name = new_name;
1614 }
1615
1616 spin_unlock_irq(&kernfs_rename_lock);
1617
1618 kn->hash = kernfs_name_hash(kn->name, kn->ns);
1619 kernfs_link_sibling(kn);
1620
1621 kernfs_put(old_parent);
1622 kfree_const(old_name);
1623
1624 error = 0;
1625 out:
1626 up_write(&kernfs_rwsem);
1627 return error;
1628}
1629
1630
1631static inline unsigned char dt_type(struct kernfs_node *kn)
1632{
1633 return (kn->mode >> 12) & 15;
1634}
1635
1636static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1637{
1638 kernfs_put(filp->private_data);
1639 return 0;
1640}
1641
1642static struct kernfs_node *kernfs_dir_pos(const void *ns,
1643 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1644{
1645 if (pos) {
1646 int valid = kernfs_active(pos) &&
1647 pos->parent == parent && hash == pos->hash;
1648 kernfs_put(pos);
1649 if (!valid)
1650 pos = NULL;
1651 }
1652 if (!pos && (hash > 1) && (hash < INT_MAX)) {
1653 struct rb_node *node = parent->dir.children.rb_node;
1654 while (node) {
1655 pos = rb_to_kn(node);
1656
1657 if (hash < pos->hash)
1658 node = node->rb_left;
1659 else if (hash > pos->hash)
1660 node = node->rb_right;
1661 else
1662 break;
1663 }
1664 }
1665
1666 while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1667 struct rb_node *node = rb_next(&pos->rb);
1668 if (!node)
1669 pos = NULL;
1670 else
1671 pos = rb_to_kn(node);
1672 }
1673 return pos;
1674}
1675
1676static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1677 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1678{
1679 pos = kernfs_dir_pos(ns, parent, ino, pos);
1680 if (pos) {
1681 do {
1682 struct rb_node *node = rb_next(&pos->rb);
1683 if (!node)
1684 pos = NULL;
1685 else
1686 pos = rb_to_kn(node);
1687 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1688 }
1689 return pos;
1690}
1691
1692static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1693{
1694 struct dentry *dentry = file->f_path.dentry;
1695 struct kernfs_node *parent = kernfs_dentry_node(dentry);
1696 struct kernfs_node *pos = file->private_data;
1697 const void *ns = NULL;
1698
1699 if (!dir_emit_dots(file, ctx))
1700 return 0;
1701 down_read(&kernfs_rwsem);
1702
1703 if (kernfs_ns_enabled(parent))
1704 ns = kernfs_info(dentry->d_sb)->ns;
1705
1706 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1707 pos;
1708 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1709 const char *name = pos->name;
1710 unsigned int type = dt_type(pos);
1711 int len = strlen(name);
1712 ino_t ino = kernfs_ino(pos);
1713
1714 ctx->pos = pos->hash;
1715 file->private_data = pos;
1716 kernfs_get(pos);
1717
1718 up_read(&kernfs_rwsem);
1719 if (!dir_emit(ctx, name, len, ino, type))
1720 return 0;
1721 down_read(&kernfs_rwsem);
1722 }
1723 up_read(&kernfs_rwsem);
1724 file->private_data = NULL;
1725 ctx->pos = INT_MAX;
1726 return 0;
1727}
1728
1729const struct file_operations kernfs_dir_fops = {
1730 .read = generic_read_dir,
1731 .iterate_shared = kernfs_fop_readdir,
1732 .release = kernfs_dir_fop_release,
1733 .llseek = generic_file_llseek,
1734};
1735