1
2
3
4
5#include <linux/capability.h>
6#include <linux/audit.h>
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/lsm_hooks.h>
10#include <linux/file.h>
11#include <linux/mm.h>
12#include <linux/mman.h>
13#include <linux/pagemap.h>
14#include <linux/swap.h>
15#include <linux/skbuff.h>
16#include <linux/netlink.h>
17#include <linux/ptrace.h>
18#include <linux/xattr.h>
19#include <linux/hugetlb.h>
20#include <linux/mount.h>
21#include <linux/sched.h>
22#include <linux/prctl.h>
23#include <linux/securebits.h>
24#include <linux/user_namespace.h>
25#include <linux/binfmts.h>
26#include <linux/personality.h>
27
28
29
30
31
32
33
34
35
36
37
38
39static void warn_setuid_and_fcaps_mixed(const char *fname)
40{
41 static int warned;
42 if (!warned) {
43 printk(KERN_INFO "warning: `%s' has both setuid-root and"
44 " effective capabilities. Therefore not raising all"
45 " capabilities.\n", fname);
46 warned = 1;
47 }
48}
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
66 int cap, unsigned int opts)
67{
68 struct user_namespace *ns = targ_ns;
69
70
71
72
73
74 for (;;) {
75
76 if (ns == cred->user_ns)
77 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
78
79
80
81
82
83 if (ns->level <= cred->user_ns->level)
84 return -EPERM;
85
86
87
88
89
90 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
91 return 0;
92
93
94
95
96
97 ns = ns->parent;
98 }
99
100
101}
102
103
104
105
106
107
108
109
110
111int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
112{
113 if (!capable(CAP_SYS_TIME))
114 return -EPERM;
115 return 0;
116}
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
134{
135 int ret = 0;
136 const struct cred *cred, *child_cred;
137 const kernel_cap_t *caller_caps;
138
139 rcu_read_lock();
140 cred = current_cred();
141 child_cred = __task_cred(child);
142 if (mode & PTRACE_MODE_FSCREDS)
143 caller_caps = &cred->cap_effective;
144 else
145 caller_caps = &cred->cap_permitted;
146 if (cred->user_ns == child_cred->user_ns &&
147 cap_issubset(child_cred->cap_permitted, *caller_caps))
148 goto out;
149 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
150 goto out;
151 ret = -EPERM;
152out:
153 rcu_read_unlock();
154 return ret;
155}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170int cap_ptrace_traceme(struct task_struct *parent)
171{
172 int ret = 0;
173 const struct cred *cred, *child_cred;
174
175 rcu_read_lock();
176 cred = __task_cred(parent);
177 child_cred = current_cred();
178 if (cred->user_ns == child_cred->user_ns &&
179 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
180 goto out;
181 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
182 goto out;
183 ret = -EPERM;
184out:
185 rcu_read_unlock();
186 return ret;
187}
188
189
190
191
192
193
194
195
196
197
198
199int cap_capget(struct task_struct *target, kernel_cap_t *effective,
200 kernel_cap_t *inheritable, kernel_cap_t *permitted)
201{
202 const struct cred *cred;
203
204
205 rcu_read_lock();
206 cred = __task_cred(target);
207 *effective = cred->cap_effective;
208 *inheritable = cred->cap_inheritable;
209 *permitted = cred->cap_permitted;
210 rcu_read_unlock();
211 return 0;
212}
213
214
215
216
217
218static inline int cap_inh_is_capped(void)
219{
220
221
222
223 if (cap_capable(current_cred(), current_cred()->user_ns,
224 CAP_SETPCAP, CAP_OPT_NONE) == 0)
225 return 0;
226 return 1;
227}
228
229
230
231
232
233
234
235
236
237
238
239
240
241int cap_capset(struct cred *new,
242 const struct cred *old,
243 const kernel_cap_t *effective,
244 const kernel_cap_t *inheritable,
245 const kernel_cap_t *permitted)
246{
247 if (cap_inh_is_capped() &&
248 !cap_issubset(*inheritable,
249 cap_combine(old->cap_inheritable,
250 old->cap_permitted)))
251
252 return -EPERM;
253
254 if (!cap_issubset(*inheritable,
255 cap_combine(old->cap_inheritable,
256 old->cap_bset)))
257
258 return -EPERM;
259
260
261 if (!cap_issubset(*permitted, old->cap_permitted))
262 return -EPERM;
263
264
265 if (!cap_issubset(*effective, *permitted))
266 return -EPERM;
267
268 new->cap_effective = *effective;
269 new->cap_inheritable = *inheritable;
270 new->cap_permitted = *permitted;
271
272
273
274
275
276 new->cap_ambient = cap_intersect(new->cap_ambient,
277 cap_intersect(*permitted,
278 *inheritable));
279 if (WARN_ON(!cap_ambient_invariant_ok(new)))
280 return -EINVAL;
281 return 0;
282}
283
284
285
286
287
288
289
290
291
292
293
294
295int cap_inode_need_killpriv(struct dentry *dentry)
296{
297 struct inode *inode = d_backing_inode(dentry);
298 int error;
299
300 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
301 return error > 0;
302}
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
321{
322 int error;
323
324 error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
325 if (error == -EOPNOTSUPP)
326 error = 0;
327 return error;
328}
329
330static bool rootid_owns_currentns(kuid_t kroot)
331{
332 struct user_namespace *ns;
333
334 if (!uid_valid(kroot))
335 return false;
336
337 for (ns = current_user_ns(); ; ns = ns->parent) {
338 if (from_kuid(ns, kroot) == 0)
339 return true;
340 if (ns == &init_user_ns)
341 break;
342 }
343
344 return false;
345}
346
347static __u32 sansflags(__u32 m)
348{
349 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
350}
351
352static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
353{
354 if (size != XATTR_CAPS_SZ_2)
355 return false;
356 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
357}
358
359static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
360{
361 if (size != XATTR_CAPS_SZ_3)
362 return false;
363 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
364}
365
366
367
368
369
370
371
372
373
374
375
376
377int cap_inode_getsecurity(struct user_namespace *mnt_userns,
378 struct inode *inode, const char *name, void **buffer,
379 bool alloc)
380{
381 int size, ret;
382 kuid_t kroot;
383 u32 nsmagic, magic;
384 uid_t root, mappedroot;
385 char *tmpbuf = NULL;
386 struct vfs_cap_data *cap;
387 struct vfs_ns_cap_data *nscap = NULL;
388 struct dentry *dentry;
389 struct user_namespace *fs_ns;
390
391 if (strcmp(name, "capability") != 0)
392 return -EOPNOTSUPP;
393
394 dentry = d_find_any_alias(inode);
395 if (!dentry)
396 return -EINVAL;
397
398 size = sizeof(struct vfs_ns_cap_data);
399 ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS,
400 &tmpbuf, size, GFP_NOFS);
401 dput(dentry);
402
403 if (ret < 0 || !tmpbuf)
404 return ret;
405
406 fs_ns = inode->i_sb->s_user_ns;
407 cap = (struct vfs_cap_data *) tmpbuf;
408 if (is_v2header((size_t) ret, cap)) {
409 root = 0;
410 } else if (is_v3header((size_t) ret, cap)) {
411 nscap = (struct vfs_ns_cap_data *) tmpbuf;
412 root = le32_to_cpu(nscap->rootid);
413 } else {
414 size = -EINVAL;
415 goto out_free;
416 }
417
418 kroot = make_kuid(fs_ns, root);
419
420
421 kroot = kuid_into_mnt(mnt_userns, kroot);
422
423
424
425 mappedroot = from_kuid(current_user_ns(), kroot);
426 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
427 size = sizeof(struct vfs_ns_cap_data);
428 if (alloc) {
429 if (!nscap) {
430
431 nscap = kzalloc(size, GFP_ATOMIC);
432 if (!nscap) {
433 size = -ENOMEM;
434 goto out_free;
435 }
436 nsmagic = VFS_CAP_REVISION_3;
437 magic = le32_to_cpu(cap->magic_etc);
438 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
439 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
440 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
441 nscap->magic_etc = cpu_to_le32(nsmagic);
442 } else {
443
444 tmpbuf = NULL;
445 }
446 nscap->rootid = cpu_to_le32(mappedroot);
447 *buffer = nscap;
448 }
449 goto out_free;
450 }
451
452 if (!rootid_owns_currentns(kroot)) {
453 size = -EOVERFLOW;
454 goto out_free;
455 }
456
457
458 size = sizeof(struct vfs_cap_data);
459 if (alloc) {
460 if (nscap) {
461
462 cap = kzalloc(size, GFP_ATOMIC);
463 if (!cap) {
464 size = -ENOMEM;
465 goto out_free;
466 }
467 magic = VFS_CAP_REVISION_2;
468 nsmagic = le32_to_cpu(nscap->magic_etc);
469 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
470 magic |= VFS_CAP_FLAGS_EFFECTIVE;
471 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
472 cap->magic_etc = cpu_to_le32(magic);
473 } else {
474
475 tmpbuf = NULL;
476 }
477 *buffer = cap;
478 }
479out_free:
480 kfree(tmpbuf);
481 return size;
482}
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498static kuid_t rootid_from_xattr(const void *value, size_t size,
499 struct user_namespace *task_ns,
500 struct user_namespace *mnt_userns)
501{
502 const struct vfs_ns_cap_data *nscap = value;
503 kuid_t rootkid;
504 uid_t rootid = 0;
505
506 if (size == XATTR_CAPS_SZ_3)
507 rootid = le32_to_cpu(nscap->rootid);
508
509 rootkid = make_kuid(task_ns, rootid);
510 return kuid_from_mnt(mnt_userns, rootkid);
511}
512
513static bool validheader(size_t size, const struct vfs_cap_data *cap)
514{
515 return is_v2header(size, cap) || is_v3header(size, cap);
516}
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
538 const void **ivalue, size_t size)
539{
540 struct vfs_ns_cap_data *nscap;
541 uid_t nsrootid;
542 const struct vfs_cap_data *cap = *ivalue;
543 __u32 magic, nsmagic;
544 struct inode *inode = d_backing_inode(dentry);
545 struct user_namespace *task_ns = current_user_ns(),
546 *fs_ns = inode->i_sb->s_user_ns;
547 kuid_t rootid;
548 size_t newsize;
549
550 if (!*ivalue)
551 return -EINVAL;
552 if (!validheader(size, cap))
553 return -EINVAL;
554 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
555 return -EPERM;
556 if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
557 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
558
559 return size;
560
561 rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns);
562 if (!uid_valid(rootid))
563 return -EINVAL;
564
565 nsrootid = from_kuid(fs_ns, rootid);
566 if (nsrootid == -1)
567 return -EINVAL;
568
569 newsize = sizeof(struct vfs_ns_cap_data);
570 nscap = kmalloc(newsize, GFP_ATOMIC);
571 if (!nscap)
572 return -ENOMEM;
573 nscap->rootid = cpu_to_le32(nsrootid);
574 nsmagic = VFS_CAP_REVISION_3;
575 magic = le32_to_cpu(cap->magic_etc);
576 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
577 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
578 nscap->magic_etc = cpu_to_le32(nsmagic);
579 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
580
581 *ivalue = nscap;
582 return newsize;
583}
584
585
586
587
588
589static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
590 struct linux_binprm *bprm,
591 bool *effective,
592 bool *has_fcap)
593{
594 struct cred *new = bprm->cred;
595 unsigned i;
596 int ret = 0;
597
598 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
599 *effective = true;
600
601 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
602 *has_fcap = true;
603
604 CAP_FOR_EACH_U32(i) {
605 __u32 permitted = caps->permitted.cap[i];
606 __u32 inheritable = caps->inheritable.cap[i];
607
608
609
610
611
612 new->cap_permitted.cap[i] =
613 (new->cap_bset.cap[i] & permitted) |
614 (new->cap_inheritable.cap[i] & inheritable);
615
616 if (permitted & ~new->cap_permitted.cap[i])
617
618 ret = -EPERM;
619 }
620
621
622
623
624
625
626 return *effective ? ret : 0;
627}
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
645 const struct dentry *dentry,
646 struct cpu_vfs_cap_data *cpu_caps)
647{
648 struct inode *inode = d_backing_inode(dentry);
649 __u32 magic_etc;
650 unsigned tocopy, i;
651 int size;
652 struct vfs_ns_cap_data data, *nscaps = &data;
653 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
654 kuid_t rootkuid;
655 struct user_namespace *fs_ns;
656
657 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
658
659 if (!inode)
660 return -ENODATA;
661
662 fs_ns = inode->i_sb->s_user_ns;
663 size = __vfs_getxattr((struct dentry *)dentry, inode,
664 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
665 if (size == -ENODATA || size == -EOPNOTSUPP)
666
667 return -ENODATA;
668
669 if (size < 0)
670 return size;
671
672 if (size < sizeof(magic_etc))
673 return -EINVAL;
674
675 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
676
677 rootkuid = make_kuid(fs_ns, 0);
678 switch (magic_etc & VFS_CAP_REVISION_MASK) {
679 case VFS_CAP_REVISION_1:
680 if (size != XATTR_CAPS_SZ_1)
681 return -EINVAL;
682 tocopy = VFS_CAP_U32_1;
683 break;
684 case VFS_CAP_REVISION_2:
685 if (size != XATTR_CAPS_SZ_2)
686 return -EINVAL;
687 tocopy = VFS_CAP_U32_2;
688 break;
689 case VFS_CAP_REVISION_3:
690 if (size != XATTR_CAPS_SZ_3)
691 return -EINVAL;
692 tocopy = VFS_CAP_U32_3;
693 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
694 break;
695
696 default:
697 return -EINVAL;
698 }
699
700
701
702 rootkuid = kuid_into_mnt(mnt_userns, rootkuid);
703 if (!rootid_owns_currentns(rootkuid))
704 return -ENODATA;
705
706 CAP_FOR_EACH_U32(i) {
707 if (i >= tocopy)
708 break;
709 cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
710 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
711 }
712
713 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
714 cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
715
716 cpu_caps->rootid = rootkuid;
717
718 return 0;
719}
720
721
722
723
724
725
726static int get_file_caps(struct linux_binprm *bprm, struct file *file,
727 bool *effective, bool *has_fcap)
728{
729 int rc = 0;
730 struct cpu_vfs_cap_data vcaps;
731
732 cap_clear(bprm->cred->cap_permitted);
733
734 if (!file_caps_enabled)
735 return 0;
736
737 if (!mnt_may_suid(file->f_path.mnt))
738 return 0;
739
740
741
742
743
744
745 if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
746 return 0;
747
748 rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
749 file->f_path.dentry, &vcaps);
750 if (rc < 0) {
751 if (rc == -EINVAL)
752 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
753 bprm->filename);
754 else if (rc == -ENODATA)
755 rc = 0;
756 goto out;
757 }
758
759 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
760
761out:
762 if (rc)
763 cap_clear(bprm->cred->cap_permitted);
764
765 return rc;
766}
767
768static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
769
770static inline bool __is_real(kuid_t uid, struct cred *cred)
771{ return uid_eq(cred->uid, uid); }
772
773static inline bool __is_eff(kuid_t uid, struct cred *cred)
774{ return uid_eq(cred->euid, uid); }
775
776static inline bool __is_suid(kuid_t uid, struct cred *cred)
777{ return !__is_real(uid, cred) && __is_eff(uid, cred); }
778
779
780
781
782
783
784
785
786
787
788
789
790
791static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
792 bool *effective, kuid_t root_uid)
793{
794 const struct cred *old = current_cred();
795 struct cred *new = bprm->cred;
796
797 if (!root_privileged())
798 return;
799
800
801
802
803
804 if (has_fcap && __is_suid(root_uid, new)) {
805 warn_setuid_and_fcaps_mixed(bprm->filename);
806 return;
807 }
808
809
810
811
812
813 if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
814
815 new->cap_permitted = cap_combine(old->cap_bset,
816 old->cap_inheritable);
817 }
818
819
820
821 if (__is_eff(root_uid, new))
822 *effective = true;
823}
824
825#define __cap_gained(field, target, source) \
826 !cap_issubset(target->cap_##field, source->cap_##field)
827#define __cap_grew(target, source, cred) \
828 !cap_issubset(cred->cap_##target, cred->cap_##source)
829#define __cap_full(field, cred) \
830 cap_issubset(CAP_FULL_SET, cred->cap_##field)
831
832static inline bool __is_setuid(struct cred *new, const struct cred *old)
833{ return !uid_eq(new->euid, old->uid); }
834
835static inline bool __is_setgid(struct cred *new, const struct cred *old)
836{ return !gid_eq(new->egid, old->gid); }
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
856 kuid_t root, bool has_fcap)
857{
858 bool ret = false;
859
860 if ((__cap_grew(effective, ambient, new) &&
861 !(__cap_full(effective, new) &&
862 (__is_eff(root, new) || __is_real(root, new)) &&
863 root_privileged())) ||
864 (root_privileged() &&
865 __is_suid(root, new) &&
866 !__cap_full(effective, new)) ||
867 (!__is_setuid(new, old) &&
868 ((has_fcap &&
869 __cap_gained(permitted, new, old)) ||
870 __cap_gained(ambient, new, old))))
871
872 ret = true;
873
874 return ret;
875}
876
877
878
879
880
881
882
883
884
885
886
887
888int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
889{
890
891 const struct cred *old = current_cred();
892 struct cred *new = bprm->cred;
893 bool effective = false, has_fcap = false, is_setid;
894 int ret;
895 kuid_t root_uid;
896
897 if (WARN_ON(!cap_ambient_invariant_ok(old)))
898 return -EPERM;
899
900 ret = get_file_caps(bprm, file, &effective, &has_fcap);
901 if (ret < 0)
902 return ret;
903
904 root_uid = make_kuid(new->user_ns, 0);
905
906 handle_privileged_root(bprm, has_fcap, &effective, root_uid);
907
908
909 if (__cap_gained(permitted, new, old))
910 bprm->per_clear |= PER_CLEAR_ON_SETID;
911
912
913
914
915
916
917 is_setid = __is_setuid(new, old) || __is_setgid(new, old);
918
919 if ((is_setid || __cap_gained(permitted, new, old)) &&
920 ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
921 !ptracer_capable(current, new->user_ns))) {
922
923 if (!ns_capable(new->user_ns, CAP_SETUID) ||
924 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
925 new->euid = new->uid;
926 new->egid = new->gid;
927 }
928 new->cap_permitted = cap_intersect(new->cap_permitted,
929 old->cap_permitted);
930 }
931
932 new->suid = new->fsuid = new->euid;
933 new->sgid = new->fsgid = new->egid;
934
935
936 if (has_fcap || is_setid)
937 cap_clear(new->cap_ambient);
938
939
940
941
942
943 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
944
945
946
947
948
949 if (effective)
950 new->cap_effective = new->cap_permitted;
951 else
952 new->cap_effective = new->cap_ambient;
953
954 if (WARN_ON(!cap_ambient_invariant_ok(new)))
955 return -EPERM;
956
957 if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
958 ret = audit_log_bprm_fcaps(bprm, new, old);
959 if (ret < 0)
960 return ret;
961 }
962
963 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
964
965 if (WARN_ON(!cap_ambient_invariant_ok(new)))
966 return -EPERM;
967
968
969 if (is_setid ||
970 (!__is_real(root_uid, new) &&
971 (effective ||
972 __cap_grew(permitted, ambient, new))))
973 bprm->secureexec = 1;
974
975 return 0;
976}
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992int cap_inode_setxattr(struct dentry *dentry, const char *name,
993 const void *value, size_t size, int flags)
994{
995 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
996
997
998 if (strncmp(name, XATTR_SECURITY_PREFIX,
999 XATTR_SECURITY_PREFIX_LEN) != 0)
1000 return 0;
1001
1002
1003
1004
1005
1006 if (strcmp(name, XATTR_NAME_CAPS) == 0)
1007 return 0;
1008
1009 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1010 return -EPERM;
1011 return 0;
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033int cap_inode_removexattr(struct user_namespace *mnt_userns,
1034 struct dentry *dentry, const char *name)
1035{
1036 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1037
1038
1039 if (strncmp(name, XATTR_SECURITY_PREFIX,
1040 XATTR_SECURITY_PREFIX_LEN) != 0)
1041 return 0;
1042
1043 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1044
1045 struct inode *inode = d_backing_inode(dentry);
1046 if (!inode)
1047 return -EINVAL;
1048 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
1049 return -EPERM;
1050 return 0;
1051 }
1052
1053 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1054 return -EPERM;
1055 return 0;
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1088{
1089 kuid_t root_uid = make_kuid(old->user_ns, 0);
1090
1091 if ((uid_eq(old->uid, root_uid) ||
1092 uid_eq(old->euid, root_uid) ||
1093 uid_eq(old->suid, root_uid)) &&
1094 (!uid_eq(new->uid, root_uid) &&
1095 !uid_eq(new->euid, root_uid) &&
1096 !uid_eq(new->suid, root_uid))) {
1097 if (!issecure(SECURE_KEEP_CAPS)) {
1098 cap_clear(new->cap_permitted);
1099 cap_clear(new->cap_effective);
1100 }
1101
1102
1103
1104
1105
1106
1107 cap_clear(new->cap_ambient);
1108 }
1109 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1110 cap_clear(new->cap_effective);
1111 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1112 new->cap_effective = new->cap_permitted;
1113}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1127{
1128 switch (flags) {
1129 case LSM_SETID_RE:
1130 case LSM_SETID_ID:
1131 case LSM_SETID_RES:
1132
1133
1134 if (!issecure(SECURE_NO_SETUID_FIXUP))
1135 cap_emulate_setxuid(new, old);
1136 break;
1137
1138 case LSM_SETID_FS:
1139
1140
1141
1142
1143
1144
1145 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1146 kuid_t root_uid = make_kuid(old->user_ns, 0);
1147 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1148 new->cap_effective =
1149 cap_drop_fs_set(new->cap_effective);
1150
1151 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1152 new->cap_effective =
1153 cap_raise_fs_set(new->cap_effective,
1154 new->cap_permitted);
1155 }
1156 break;
1157
1158 default:
1159 return -EINVAL;
1160 }
1161
1162 return 0;
1163}
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175static int cap_safe_nice(struct task_struct *p)
1176{
1177 int is_subset, ret = 0;
1178
1179 rcu_read_lock();
1180 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1181 current_cred()->cap_permitted);
1182 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1183 ret = -EPERM;
1184 rcu_read_unlock();
1185
1186 return ret;
1187}
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198int cap_task_setscheduler(struct task_struct *p)
1199{
1200 return cap_safe_nice(p);
1201}
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213int cap_task_setioprio(struct task_struct *p, int ioprio)
1214{
1215 return cap_safe_nice(p);
1216}
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228int cap_task_setnice(struct task_struct *p, int nice)
1229{
1230 return cap_safe_nice(p);
1231}
1232
1233
1234
1235
1236
1237static int cap_prctl_drop(unsigned long cap)
1238{
1239 struct cred *new;
1240
1241 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1242 return -EPERM;
1243 if (!cap_valid(cap))
1244 return -EINVAL;
1245
1246 new = prepare_creds();
1247 if (!new)
1248 return -ENOMEM;
1249 cap_lower(new->cap_bset, cap);
1250 return commit_creds(new);
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1269 unsigned long arg4, unsigned long arg5)
1270{
1271 const struct cred *old = current_cred();
1272 struct cred *new;
1273
1274 switch (option) {
1275 case PR_CAPBSET_READ:
1276 if (!cap_valid(arg2))
1277 return -EINVAL;
1278 return !!cap_raised(old->cap_bset, arg2);
1279
1280 case PR_CAPBSET_DROP:
1281 return cap_prctl_drop(arg2);
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302 case PR_SET_SECUREBITS:
1303 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1304 & (old->securebits ^ arg2))
1305 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))
1306 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
1307 || (cap_capable(current_cred(),
1308 current_cred()->user_ns,
1309 CAP_SETPCAP,
1310 CAP_OPT_NONE) != 0)
1311
1312
1313
1314
1315
1316
1317
1318 )
1319
1320 return -EPERM;
1321
1322 new = prepare_creds();
1323 if (!new)
1324 return -ENOMEM;
1325 new->securebits = arg2;
1326 return commit_creds(new);
1327
1328 case PR_GET_SECUREBITS:
1329 return old->securebits;
1330
1331 case PR_GET_KEEPCAPS:
1332 return !!issecure(SECURE_KEEP_CAPS);
1333
1334 case PR_SET_KEEPCAPS:
1335 if (arg2 > 1)
1336 return -EINVAL;
1337 if (issecure(SECURE_KEEP_CAPS_LOCKED))
1338 return -EPERM;
1339
1340 new = prepare_creds();
1341 if (!new)
1342 return -ENOMEM;
1343 if (arg2)
1344 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1345 else
1346 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1347 return commit_creds(new);
1348
1349 case PR_CAP_AMBIENT:
1350 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1351 if (arg3 | arg4 | arg5)
1352 return -EINVAL;
1353
1354 new = prepare_creds();
1355 if (!new)
1356 return -ENOMEM;
1357 cap_clear(new->cap_ambient);
1358 return commit_creds(new);
1359 }
1360
1361 if (((!cap_valid(arg3)) | arg4 | arg5))
1362 return -EINVAL;
1363
1364 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1365 return !!cap_raised(current_cred()->cap_ambient, arg3);
1366 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1367 arg2 != PR_CAP_AMBIENT_LOWER) {
1368 return -EINVAL;
1369 } else {
1370 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1371 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1372 !cap_raised(current_cred()->cap_inheritable,
1373 arg3) ||
1374 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1375 return -EPERM;
1376
1377 new = prepare_creds();
1378 if (!new)
1379 return -ENOMEM;
1380 if (arg2 == PR_CAP_AMBIENT_RAISE)
1381 cap_raise(new->cap_ambient, arg3);
1382 else
1383 cap_lower(new->cap_ambient, arg3);
1384 return commit_creds(new);
1385 }
1386
1387 default:
1388
1389 return -ENOSYS;
1390 }
1391}
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1404{
1405 int cap_sys_admin = 0;
1406
1407 if (cap_capable(current_cred(), &init_user_ns,
1408 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1409 cap_sys_admin = 1;
1410
1411 return cap_sys_admin;
1412}
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424int cap_mmap_addr(unsigned long addr)
1425{
1426 int ret = 0;
1427
1428 if (addr < dac_mmap_min_addr) {
1429 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1430 CAP_OPT_NONE);
1431
1432 if (ret == 0)
1433 current->flags |= PF_SUPERPRIV;
1434 }
1435 return ret;
1436}
1437
1438int cap_mmap_file(struct file *file, unsigned long reqprot,
1439 unsigned long prot, unsigned long flags)
1440{
1441 return 0;
1442}
1443
1444#ifdef CONFIG_SECURITY
1445
1446static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1447 LSM_HOOK_INIT(capable, cap_capable),
1448 LSM_HOOK_INIT(settime, cap_settime),
1449 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1450 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1451 LSM_HOOK_INIT(capget, cap_capget),
1452 LSM_HOOK_INIT(capset, cap_capset),
1453 LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1454 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1455 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1456 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1457 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1458 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1459 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1460 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1461 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1462 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1463 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1464 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1465};
1466
1467static int __init capability_init(void)
1468{
1469 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1470 "capability");
1471 return 0;
1472}
1473
1474DEFINE_LSM(capability) = {
1475 .name = "capability",
1476 .order = LSM_ORDER_FIRST,
1477 .init = capability_init,
1478};
1479
1480#endif
1481