1
2
3
4
5#include <linux/capability.h>
6#include <linux/audit.h>
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/lsm_hooks.h>
10#include <linux/file.h>
11#include <linux/mm.h>
12#include <linux/mman.h>
13#include <linux/pagemap.h>
14#include <linux/swap.h>
15#include <linux/skbuff.h>
16#include <linux/netlink.h>
17#include <linux/ptrace.h>
18#include <linux/xattr.h>
19#include <linux/hugetlb.h>
20#include <linux/mount.h>
21#include <linux/sched.h>
22#include <linux/prctl.h>
23#include <linux/securebits.h>
24#include <linux/user_namespace.h>
25#include <linux/binfmts.h>
26#include <linux/personality.h>
27#include <linux/mnt_idmapping.h>
28
29
30
31
32
33
34
35
36
37
38
39
40static void warn_setuid_and_fcaps_mixed(const char *fname)
41{
42 static int warned;
43 if (!warned) {
44 printk(KERN_INFO "warning: `%s' has both setuid-root and"
45 " effective capabilities. Therefore not raising all"
46 " capabilities.\n", fname);
47 warned = 1;
48 }
49}
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
67 int cap, unsigned int opts)
68{
69 struct user_namespace *ns = targ_ns;
70
71
72
73
74
75 for (;;) {
76
77 if (ns == cred->user_ns)
78 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
79
80
81
82
83
84 if (ns->level <= cred->user_ns->level)
85 return -EPERM;
86
87
88
89
90
91 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
92 return 0;
93
94
95
96
97
98 ns = ns->parent;
99 }
100
101
102}
103
104
105
106
107
108
109
110
111
112int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
113{
114 if (!capable(CAP_SYS_TIME))
115 return -EPERM;
116 return 0;
117}
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
135{
136 int ret = 0;
137 const struct cred *cred, *child_cred;
138 const kernel_cap_t *caller_caps;
139
140 rcu_read_lock();
141 cred = current_cred();
142 child_cred = __task_cred(child);
143 if (mode & PTRACE_MODE_FSCREDS)
144 caller_caps = &cred->cap_effective;
145 else
146 caller_caps = &cred->cap_permitted;
147 if (cred->user_ns == child_cred->user_ns &&
148 cap_issubset(child_cred->cap_permitted, *caller_caps))
149 goto out;
150 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
151 goto out;
152 ret = -EPERM;
153out:
154 rcu_read_unlock();
155 return ret;
156}
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171int cap_ptrace_traceme(struct task_struct *parent)
172{
173 int ret = 0;
174 const struct cred *cred, *child_cred;
175
176 rcu_read_lock();
177 cred = __task_cred(parent);
178 child_cred = current_cred();
179 if (cred->user_ns == child_cred->user_ns &&
180 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
181 goto out;
182 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
183 goto out;
184 ret = -EPERM;
185out:
186 rcu_read_unlock();
187 return ret;
188}
189
190
191
192
193
194
195
196
197
198
199
200int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
201 kernel_cap_t *inheritable, kernel_cap_t *permitted)
202{
203 const struct cred *cred;
204
205
206 rcu_read_lock();
207 cred = __task_cred(target);
208 *effective = cred->cap_effective;
209 *inheritable = cred->cap_inheritable;
210 *permitted = cred->cap_permitted;
211 rcu_read_unlock();
212 return 0;
213}
214
215
216
217
218
219static inline int cap_inh_is_capped(void)
220{
221
222
223
224 if (cap_capable(current_cred(), current_cred()->user_ns,
225 CAP_SETPCAP, CAP_OPT_NONE) == 0)
226 return 0;
227 return 1;
228}
229
230
231
232
233
234
235
236
237
238
239
240
241
242int cap_capset(struct cred *new,
243 const struct cred *old,
244 const kernel_cap_t *effective,
245 const kernel_cap_t *inheritable,
246 const kernel_cap_t *permitted)
247{
248 if (cap_inh_is_capped() &&
249 !cap_issubset(*inheritable,
250 cap_combine(old->cap_inheritable,
251 old->cap_permitted)))
252
253 return -EPERM;
254
255 if (!cap_issubset(*inheritable,
256 cap_combine(old->cap_inheritable,
257 old->cap_bset)))
258
259 return -EPERM;
260
261
262 if (!cap_issubset(*permitted, old->cap_permitted))
263 return -EPERM;
264
265
266 if (!cap_issubset(*effective, *permitted))
267 return -EPERM;
268
269 new->cap_effective = *effective;
270 new->cap_inheritable = *inheritable;
271 new->cap_permitted = *permitted;
272
273
274
275
276
277 new->cap_ambient = cap_intersect(new->cap_ambient,
278 cap_intersect(*permitted,
279 *inheritable));
280 if (WARN_ON(!cap_ambient_invariant_ok(new)))
281 return -EINVAL;
282 return 0;
283}
284
285
286
287
288
289
290
291
292
293
294
295
296int cap_inode_need_killpriv(struct dentry *dentry)
297{
298 struct inode *inode = d_backing_inode(dentry);
299 int error;
300
301 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
302 return error > 0;
303}
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
322{
323 int error;
324
325 error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
326 if (error == -EOPNOTSUPP)
327 error = 0;
328 return error;
329}
330
331static bool rootid_owns_currentns(vfsuid_t rootvfsuid)
332{
333 struct user_namespace *ns;
334 kuid_t kroot;
335
336 if (!vfsuid_valid(rootvfsuid))
337 return false;
338
339 kroot = vfsuid_into_kuid(rootvfsuid);
340 for (ns = current_user_ns();; ns = ns->parent) {
341 if (from_kuid(ns, kroot) == 0)
342 return true;
343 if (ns == &init_user_ns)
344 break;
345 }
346
347 return false;
348}
349
350static __u32 sansflags(__u32 m)
351{
352 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
353}
354
355static bool is_v2header(int size, const struct vfs_cap_data *cap)
356{
357 if (size != XATTR_CAPS_SZ_2)
358 return false;
359 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
360}
361
362static bool is_v3header(int size, const struct vfs_cap_data *cap)
363{
364 if (size != XATTR_CAPS_SZ_3)
365 return false;
366 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
367}
368
369
370
371
372
373
374
375
376
377
378
379
380int cap_inode_getsecurity(struct mnt_idmap *idmap,
381 struct inode *inode, const char *name, void **buffer,
382 bool alloc)
383{
384 int size;
385 kuid_t kroot;
386 vfsuid_t vfsroot;
387 u32 nsmagic, magic;
388 uid_t root, mappedroot;
389 char *tmpbuf = NULL;
390 struct vfs_cap_data *cap;
391 struct vfs_ns_cap_data *nscap = NULL;
392 struct dentry *dentry;
393 struct user_namespace *fs_ns;
394
395 if (strcmp(name, "capability") != 0)
396 return -EOPNOTSUPP;
397
398 dentry = d_find_any_alias(inode);
399 if (!dentry)
400 return -EINVAL;
401 size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
402 sizeof(struct vfs_ns_cap_data), GFP_NOFS);
403 dput(dentry);
404
405 if (size < 0 || !tmpbuf)
406 goto out_free;
407
408 fs_ns = inode->i_sb->s_user_ns;
409 cap = (struct vfs_cap_data *) tmpbuf;
410 if (is_v2header(size, cap)) {
411 root = 0;
412 } else if (is_v3header(size, cap)) {
413 nscap = (struct vfs_ns_cap_data *) tmpbuf;
414 root = le32_to_cpu(nscap->rootid);
415 } else {
416 size = -EINVAL;
417 goto out_free;
418 }
419
420 kroot = make_kuid(fs_ns, root);
421
422
423 vfsroot = make_vfsuid(idmap, fs_ns, kroot);
424
425
426
427 mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));
428 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
429 size = sizeof(struct vfs_ns_cap_data);
430 if (alloc) {
431 if (!nscap) {
432
433 nscap = kzalloc(size, GFP_ATOMIC);
434 if (!nscap) {
435 size = -ENOMEM;
436 goto out_free;
437 }
438 nsmagic = VFS_CAP_REVISION_3;
439 magic = le32_to_cpu(cap->magic_etc);
440 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
441 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
442 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
443 nscap->magic_etc = cpu_to_le32(nsmagic);
444 } else {
445
446 tmpbuf = NULL;
447 }
448 nscap->rootid = cpu_to_le32(mappedroot);
449 *buffer = nscap;
450 }
451 goto out_free;
452 }
453
454 if (!rootid_owns_currentns(vfsroot)) {
455 size = -EOVERFLOW;
456 goto out_free;
457 }
458
459
460 size = sizeof(struct vfs_cap_data);
461 if (alloc) {
462 if (nscap) {
463
464 cap = kzalloc(size, GFP_ATOMIC);
465 if (!cap) {
466 size = -ENOMEM;
467 goto out_free;
468 }
469 magic = VFS_CAP_REVISION_2;
470 nsmagic = le32_to_cpu(nscap->magic_etc);
471 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
472 magic |= VFS_CAP_FLAGS_EFFECTIVE;
473 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
474 cap->magic_etc = cpu_to_le32(magic);
475 } else {
476
477 tmpbuf = NULL;
478 }
479 *buffer = cap;
480 }
481out_free:
482 kfree(tmpbuf);
483 return size;
484}
485
486
487
488
489
490
491
492
493static vfsuid_t rootid_from_xattr(const void *value, size_t size,
494 struct user_namespace *task_ns)
495{
496 const struct vfs_ns_cap_data *nscap = value;
497 uid_t rootid = 0;
498
499 if (size == XATTR_CAPS_SZ_3)
500 rootid = le32_to_cpu(nscap->rootid);
501
502 return VFSUIDT_INIT(make_kuid(task_ns, rootid));
503}
504
505static bool validheader(size_t size, const struct vfs_cap_data *cap)
506{
507 return is_v2header(size, cap) || is_v3header(size, cap);
508}
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
530 const void **ivalue, size_t size)
531{
532 struct vfs_ns_cap_data *nscap;
533 uid_t nsrootid;
534 const struct vfs_cap_data *cap = *ivalue;
535 __u32 magic, nsmagic;
536 struct inode *inode = d_backing_inode(dentry);
537 struct user_namespace *task_ns = current_user_ns(),
538 *fs_ns = inode->i_sb->s_user_ns;
539 kuid_t rootid;
540 vfsuid_t vfsrootid;
541 size_t newsize;
542
543 if (!*ivalue)
544 return -EINVAL;
545 if (!validheader(size, cap))
546 return -EINVAL;
547 if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
548 return -EPERM;
549 if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
550 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
551
552 return size;
553
554 vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
555 if (!vfsuid_valid(vfsrootid))
556 return -EINVAL;
557
558 rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
559 if (!uid_valid(rootid))
560 return -EINVAL;
561
562 nsrootid = from_kuid(fs_ns, rootid);
563 if (nsrootid == -1)
564 return -EINVAL;
565
566 newsize = sizeof(struct vfs_ns_cap_data);
567 nscap = kmalloc(newsize, GFP_ATOMIC);
568 if (!nscap)
569 return -ENOMEM;
570 nscap->rootid = cpu_to_le32(nsrootid);
571 nsmagic = VFS_CAP_REVISION_3;
572 magic = le32_to_cpu(cap->magic_etc);
573 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
574 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
575 nscap->magic_etc = cpu_to_le32(nsmagic);
576 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
577
578 *ivalue = nscap;
579 return newsize;
580}
581
582
583
584
585
586static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
587 struct linux_binprm *bprm,
588 bool *effective,
589 bool *has_fcap)
590{
591 struct cred *new = bprm->cred;
592 int ret = 0;
593
594 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
595 *effective = true;
596
597 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
598 *has_fcap = true;
599
600
601
602
603
604 new->cap_permitted.val =
605 (new->cap_bset.val & caps->permitted.val) |
606 (new->cap_inheritable.val & caps->inheritable.val);
607
608 if (caps->permitted.val & ~new->cap_permitted.val)
609
610 ret = -EPERM;
611
612
613
614
615
616
617 return *effective ? ret : 0;
618}
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
636 const struct dentry *dentry,
637 struct cpu_vfs_cap_data *cpu_caps)
638{
639 struct inode *inode = d_backing_inode(dentry);
640 __u32 magic_etc;
641 int size;
642 struct vfs_ns_cap_data data, *nscaps = &data;
643 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
644 kuid_t rootkuid;
645 vfsuid_t rootvfsuid;
646 struct user_namespace *fs_ns;
647
648 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
649
650 if (!inode)
651 return -ENODATA;
652
653 fs_ns = inode->i_sb->s_user_ns;
654 size = __vfs_getxattr((struct dentry *)dentry, inode,
655 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
656 if (size == -ENODATA || size == -EOPNOTSUPP)
657
658 return -ENODATA;
659
660 if (size < 0)
661 return size;
662
663 if (size < sizeof(magic_etc))
664 return -EINVAL;
665
666 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
667
668 rootkuid = make_kuid(fs_ns, 0);
669 switch (magic_etc & VFS_CAP_REVISION_MASK) {
670 case VFS_CAP_REVISION_1:
671 if (size != XATTR_CAPS_SZ_1)
672 return -EINVAL;
673 break;
674 case VFS_CAP_REVISION_2:
675 if (size != XATTR_CAPS_SZ_2)
676 return -EINVAL;
677 break;
678 case VFS_CAP_REVISION_3:
679 if (size != XATTR_CAPS_SZ_3)
680 return -EINVAL;
681 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
682 break;
683
684 default:
685 return -EINVAL;
686 }
687
688 rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
689 if (!vfsuid_valid(rootvfsuid))
690 return -ENODATA;
691
692
693
694
695 if (!rootid_owns_currentns(rootvfsuid))
696 return -ENODATA;
697
698 cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
699 cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
700
701
702
703
704
705 if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
706 cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
707 cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
708 }
709
710 cpu_caps->permitted.val &= CAP_VALID_MASK;
711 cpu_caps->inheritable.val &= CAP_VALID_MASK;
712
713 cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
714
715 return 0;
716}
717
718
719
720
721
722
723static int get_file_caps(struct linux_binprm *bprm, struct file *file,
724 bool *effective, bool *has_fcap)
725{
726 int rc = 0;
727 struct cpu_vfs_cap_data vcaps;
728
729 cap_clear(bprm->cred->cap_permitted);
730
731 if (!file_caps_enabled)
732 return 0;
733
734 if (!mnt_may_suid(file->f_path.mnt))
735 return 0;
736
737
738
739
740
741
742 if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
743 return 0;
744
745 rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
746 file->f_path.dentry, &vcaps);
747 if (rc < 0) {
748 if (rc == -EINVAL)
749 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
750 bprm->filename);
751 else if (rc == -ENODATA)
752 rc = 0;
753 goto out;
754 }
755
756 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
757
758out:
759 if (rc)
760 cap_clear(bprm->cred->cap_permitted);
761
762 return rc;
763}
764
765static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
766
767static inline bool __is_real(kuid_t uid, struct cred *cred)
768{ return uid_eq(cred->uid, uid); }
769
770static inline bool __is_eff(kuid_t uid, struct cred *cred)
771{ return uid_eq(cred->euid, uid); }
772
773static inline bool __is_suid(kuid_t uid, struct cred *cred)
774{ return !__is_real(uid, cred) && __is_eff(uid, cred); }
775
776
777
778
779
780
781
782
783
784
785
786
787
788static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
789 bool *effective, kuid_t root_uid)
790{
791 const struct cred *old = current_cred();
792 struct cred *new = bprm->cred;
793
794 if (!root_privileged())
795 return;
796
797
798
799
800
801 if (has_fcap && __is_suid(root_uid, new)) {
802 warn_setuid_and_fcaps_mixed(bprm->filename);
803 return;
804 }
805
806
807
808
809
810 if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
811
812 new->cap_permitted = cap_combine(old->cap_bset,
813 old->cap_inheritable);
814 }
815
816
817
818 if (__is_eff(root_uid, new))
819 *effective = true;
820}
821
822#define __cap_gained(field, target, source) \
823 !cap_issubset(target->cap_##field, source->cap_##field)
824#define __cap_grew(target, source, cred) \
825 !cap_issubset(cred->cap_##target, cred->cap_##source)
826#define __cap_full(field, cred) \
827 cap_issubset(CAP_FULL_SET, cred->cap_##field)
828
829static inline bool __is_setuid(struct cred *new, const struct cred *old)
830{ return !uid_eq(new->euid, old->uid); }
831
832static inline bool __is_setgid(struct cred *new, const struct cred *old)
833{ return !gid_eq(new->egid, old->gid); }
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
853 kuid_t root, bool has_fcap)
854{
855 bool ret = false;
856
857 if ((__cap_grew(effective, ambient, new) &&
858 !(__cap_full(effective, new) &&
859 (__is_eff(root, new) || __is_real(root, new)) &&
860 root_privileged())) ||
861 (root_privileged() &&
862 __is_suid(root, new) &&
863 !__cap_full(effective, new)) ||
864 (!__is_setuid(new, old) &&
865 ((has_fcap &&
866 __cap_gained(permitted, new, old)) ||
867 __cap_gained(ambient, new, old))))
868
869 ret = true;
870
871 return ret;
872}
873
874
875
876
877
878
879
880
881
882
883
884
885int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
886{
887
888 const struct cred *old = current_cred();
889 struct cred *new = bprm->cred;
890 bool effective = false, has_fcap = false, is_setid;
891 int ret;
892 kuid_t root_uid;
893
894 if (WARN_ON(!cap_ambient_invariant_ok(old)))
895 return -EPERM;
896
897 ret = get_file_caps(bprm, file, &effective, &has_fcap);
898 if (ret < 0)
899 return ret;
900
901 root_uid = make_kuid(new->user_ns, 0);
902
903 handle_privileged_root(bprm, has_fcap, &effective, root_uid);
904
905
906 if (__cap_gained(permitted, new, old))
907 bprm->per_clear |= PER_CLEAR_ON_SETID;
908
909
910
911
912
913
914 is_setid = __is_setuid(new, old) || __is_setgid(new, old);
915
916 if ((is_setid || __cap_gained(permitted, new, old)) &&
917 ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
918 !ptracer_capable(current, new->user_ns))) {
919
920 if (!ns_capable(new->user_ns, CAP_SETUID) ||
921 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
922 new->euid = new->uid;
923 new->egid = new->gid;
924 }
925 new->cap_permitted = cap_intersect(new->cap_permitted,
926 old->cap_permitted);
927 }
928
929 new->suid = new->fsuid = new->euid;
930 new->sgid = new->fsgid = new->egid;
931
932
933 if (has_fcap || is_setid)
934 cap_clear(new->cap_ambient);
935
936
937
938
939
940 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
941
942
943
944
945
946 if (effective)
947 new->cap_effective = new->cap_permitted;
948 else
949 new->cap_effective = new->cap_ambient;
950
951 if (WARN_ON(!cap_ambient_invariant_ok(new)))
952 return -EPERM;
953
954 if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
955 ret = audit_log_bprm_fcaps(bprm, new, old);
956 if (ret < 0)
957 return ret;
958 }
959
960 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
961
962 if (WARN_ON(!cap_ambient_invariant_ok(new)))
963 return -EPERM;
964
965
966 if (is_setid ||
967 (!__is_real(root_uid, new) &&
968 (effective ||
969 __cap_grew(permitted, ambient, new))))
970 bprm->secureexec = 1;
971
972 return 0;
973}
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989int cap_inode_setxattr(struct dentry *dentry, const char *name,
990 const void *value, size_t size, int flags)
991{
992 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
993
994
995 if (strncmp(name, XATTR_SECURITY_PREFIX,
996 XATTR_SECURITY_PREFIX_LEN) != 0)
997 return 0;
998
999
1000
1001
1002
1003 if (strcmp(name, XATTR_NAME_CAPS) == 0)
1004 return 0;
1005
1006 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1007 return -EPERM;
1008 return 0;
1009}
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030int cap_inode_removexattr(struct mnt_idmap *idmap,
1031 struct dentry *dentry, const char *name)
1032{
1033 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1034
1035
1036 if (strncmp(name, XATTR_SECURITY_PREFIX,
1037 XATTR_SECURITY_PREFIX_LEN) != 0)
1038 return 0;
1039
1040 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1041
1042 struct inode *inode = d_backing_inode(dentry);
1043 if (!inode)
1044 return -EINVAL;
1045 if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
1046 return -EPERM;
1047 return 0;
1048 }
1049
1050 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1051 return -EPERM;
1052 return 0;
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1085{
1086 kuid_t root_uid = make_kuid(old->user_ns, 0);
1087
1088 if ((uid_eq(old->uid, root_uid) ||
1089 uid_eq(old->euid, root_uid) ||
1090 uid_eq(old->suid, root_uid)) &&
1091 (!uid_eq(new->uid, root_uid) &&
1092 !uid_eq(new->euid, root_uid) &&
1093 !uid_eq(new->suid, root_uid))) {
1094 if (!issecure(SECURE_KEEP_CAPS)) {
1095 cap_clear(new->cap_permitted);
1096 cap_clear(new->cap_effective);
1097 }
1098
1099
1100
1101
1102
1103
1104 cap_clear(new->cap_ambient);
1105 }
1106 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1107 cap_clear(new->cap_effective);
1108 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1109 new->cap_effective = new->cap_permitted;
1110}
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1124{
1125 switch (flags) {
1126 case LSM_SETID_RE:
1127 case LSM_SETID_ID:
1128 case LSM_SETID_RES:
1129
1130
1131 if (!issecure(SECURE_NO_SETUID_FIXUP))
1132 cap_emulate_setxuid(new, old);
1133 break;
1134
1135 case LSM_SETID_FS:
1136
1137
1138
1139
1140
1141
1142 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1143 kuid_t root_uid = make_kuid(old->user_ns, 0);
1144 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1145 new->cap_effective =
1146 cap_drop_fs_set(new->cap_effective);
1147
1148 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1149 new->cap_effective =
1150 cap_raise_fs_set(new->cap_effective,
1151 new->cap_permitted);
1152 }
1153 break;
1154
1155 default:
1156 return -EINVAL;
1157 }
1158
1159 return 0;
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172static int cap_safe_nice(struct task_struct *p)
1173{
1174 int is_subset, ret = 0;
1175
1176 rcu_read_lock();
1177 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1178 current_cred()->cap_permitted);
1179 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1180 ret = -EPERM;
1181 rcu_read_unlock();
1182
1183 return ret;
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195int cap_task_setscheduler(struct task_struct *p)
1196{
1197 return cap_safe_nice(p);
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210int cap_task_setioprio(struct task_struct *p, int ioprio)
1211{
1212 return cap_safe_nice(p);
1213}
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225int cap_task_setnice(struct task_struct *p, int nice)
1226{
1227 return cap_safe_nice(p);
1228}
1229
1230
1231
1232
1233
1234static int cap_prctl_drop(unsigned long cap)
1235{
1236 struct cred *new;
1237
1238 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1239 return -EPERM;
1240 if (!cap_valid(cap))
1241 return -EINVAL;
1242
1243 new = prepare_creds();
1244 if (!new)
1245 return -ENOMEM;
1246 cap_lower(new->cap_bset, cap);
1247 return commit_creds(new);
1248}
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1266 unsigned long arg4, unsigned long arg5)
1267{
1268 const struct cred *old = current_cred();
1269 struct cred *new;
1270
1271 switch (option) {
1272 case PR_CAPBSET_READ:
1273 if (!cap_valid(arg2))
1274 return -EINVAL;
1275 return !!cap_raised(old->cap_bset, arg2);
1276
1277 case PR_CAPBSET_DROP:
1278 return cap_prctl_drop(arg2);
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299 case PR_SET_SECUREBITS:
1300 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1301 & (old->securebits ^ arg2))
1302 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))
1303 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
1304 || (cap_capable(current_cred(),
1305 current_cred()->user_ns,
1306 CAP_SETPCAP,
1307 CAP_OPT_NONE) != 0)
1308
1309
1310
1311
1312
1313
1314
1315 )
1316
1317 return -EPERM;
1318
1319 new = prepare_creds();
1320 if (!new)
1321 return -ENOMEM;
1322 new->securebits = arg2;
1323 return commit_creds(new);
1324
1325 case PR_GET_SECUREBITS:
1326 return old->securebits;
1327
1328 case PR_GET_KEEPCAPS:
1329 return !!issecure(SECURE_KEEP_CAPS);
1330
1331 case PR_SET_KEEPCAPS:
1332 if (arg2 > 1)
1333 return -EINVAL;
1334 if (issecure(SECURE_KEEP_CAPS_LOCKED))
1335 return -EPERM;
1336
1337 new = prepare_creds();
1338 if (!new)
1339 return -ENOMEM;
1340 if (arg2)
1341 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1342 else
1343 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1344 return commit_creds(new);
1345
1346 case PR_CAP_AMBIENT:
1347 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1348 if (arg3 | arg4 | arg5)
1349 return -EINVAL;
1350
1351 new = prepare_creds();
1352 if (!new)
1353 return -ENOMEM;
1354 cap_clear(new->cap_ambient);
1355 return commit_creds(new);
1356 }
1357
1358 if (((!cap_valid(arg3)) | arg4 | arg5))
1359 return -EINVAL;
1360
1361 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1362 return !!cap_raised(current_cred()->cap_ambient, arg3);
1363 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1364 arg2 != PR_CAP_AMBIENT_LOWER) {
1365 return -EINVAL;
1366 } else {
1367 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1368 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1369 !cap_raised(current_cred()->cap_inheritable,
1370 arg3) ||
1371 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1372 return -EPERM;
1373
1374 new = prepare_creds();
1375 if (!new)
1376 return -ENOMEM;
1377 if (arg2 == PR_CAP_AMBIENT_RAISE)
1378 cap_raise(new->cap_ambient, arg3);
1379 else
1380 cap_lower(new->cap_ambient, arg3);
1381 return commit_creds(new);
1382 }
1383
1384 default:
1385
1386 return -ENOSYS;
1387 }
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1401{
1402 int cap_sys_admin = 0;
1403
1404 if (cap_capable(current_cred(), &init_user_ns,
1405 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1406 cap_sys_admin = 1;
1407
1408 return cap_sys_admin;
1409}
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421int cap_mmap_addr(unsigned long addr)
1422{
1423 int ret = 0;
1424
1425 if (addr < dac_mmap_min_addr) {
1426 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1427 CAP_OPT_NONE);
1428
1429 if (ret == 0)
1430 current->flags |= PF_SUPERPRIV;
1431 }
1432 return ret;
1433}
1434
1435int cap_mmap_file(struct file *file, unsigned long reqprot,
1436 unsigned long prot, unsigned long flags)
1437{
1438 return 0;
1439}
1440
1441#ifdef CONFIG_SECURITY
1442
1443static struct security_hook_list capability_hooks[] __ro_after_init = {
1444 LSM_HOOK_INIT(capable, cap_capable),
1445 LSM_HOOK_INIT(settime, cap_settime),
1446 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1447 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1448 LSM_HOOK_INIT(capget, cap_capget),
1449 LSM_HOOK_INIT(capset, cap_capset),
1450 LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1451 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1452 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1453 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1454 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1455 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1456 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1457 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1458 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1459 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1460 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1461 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1462};
1463
1464static int __init capability_init(void)
1465{
1466 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1467 "capability");
1468 return 0;
1469}
1470
1471DEFINE_LSM(capability) = {
1472 .name = "capability",
1473 .order = LSM_ORDER_FIRST,
1474 .init = capability_init,
1475};
1476
1477#endif
1478