1
2
3
4
5#include <linux/capability.h>
6#include <linux/audit.h>
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/lsm_hooks.h>
10#include <linux/file.h>
11#include <linux/mm.h>
12#include <linux/mman.h>
13#include <linux/pagemap.h>
14#include <linux/swap.h>
15#include <linux/skbuff.h>
16#include <linux/netlink.h>
17#include <linux/ptrace.h>
18#include <linux/xattr.h>
19#include <linux/hugetlb.h>
20#include <linux/mount.h>
21#include <linux/sched.h>
22#include <linux/prctl.h>
23#include <linux/securebits.h>
24#include <linux/user_namespace.h>
25#include <linux/binfmts.h>
26#include <linux/personality.h>
27#include <linux/mnt_idmapping.h>
28
29
30
31
32
33
34
35
36
37
38
39
40static void warn_setuid_and_fcaps_mixed(const char *fname)
41{
42 static int warned;
43 if (!warned) {
44 printk(KERN_INFO "warning: `%s' has both setuid-root and"
45 " effective capabilities. Therefore not raising all"
46 " capabilities.\n", fname);
47 warned = 1;
48 }
49}
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
67 int cap, unsigned int opts)
68{
69 struct user_namespace *ns = targ_ns;
70
71
72
73
74
75 for (;;) {
76
77 if (ns == cred->user_ns)
78 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
79
80
81
82
83
84 if (ns->level <= cred->user_ns->level)
85 return -EPERM;
86
87
88
89
90
91 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
92 return 0;
93
94
95
96
97
98 ns = ns->parent;
99 }
100
101
102}
103
104
105
106
107
108
109
110
111
112int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
113{
114 if (!capable(CAP_SYS_TIME))
115 return -EPERM;
116 return 0;
117}
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
135{
136 int ret = 0;
137 const struct cred *cred, *child_cred;
138 const kernel_cap_t *caller_caps;
139
140 rcu_read_lock();
141 cred = current_cred();
142 child_cred = __task_cred(child);
143 if (mode & PTRACE_MODE_FSCREDS)
144 caller_caps = &cred->cap_effective;
145 else
146 caller_caps = &cred->cap_permitted;
147 if (cred->user_ns == child_cred->user_ns &&
148 cap_issubset(child_cred->cap_permitted, *caller_caps))
149 goto out;
150 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
151 goto out;
152 ret = -EPERM;
153out:
154 rcu_read_unlock();
155 return ret;
156}
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171int cap_ptrace_traceme(struct task_struct *parent)
172{
173 int ret = 0;
174 const struct cred *cred, *child_cred;
175
176 rcu_read_lock();
177 cred = __task_cred(parent);
178 child_cred = current_cred();
179 if (cred->user_ns == child_cred->user_ns &&
180 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
181 goto out;
182 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
183 goto out;
184 ret = -EPERM;
185out:
186 rcu_read_unlock();
187 return ret;
188}
189
190
191
192
193
194
195
196
197
198
199
200int cap_capget(struct task_struct *target, kernel_cap_t *effective,
201 kernel_cap_t *inheritable, kernel_cap_t *permitted)
202{
203 const struct cred *cred;
204
205
206 rcu_read_lock();
207 cred = __task_cred(target);
208 *effective = cred->cap_effective;
209 *inheritable = cred->cap_inheritable;
210 *permitted = cred->cap_permitted;
211 rcu_read_unlock();
212 return 0;
213}
214
215
216
217
218
219static inline int cap_inh_is_capped(void)
220{
221
222
223
224 if (cap_capable(current_cred(), current_cred()->user_ns,
225 CAP_SETPCAP, CAP_OPT_NONE) == 0)
226 return 0;
227 return 1;
228}
229
230
231
232
233
234
235
236
237
238
239
240
241
242int cap_capset(struct cred *new,
243 const struct cred *old,
244 const kernel_cap_t *effective,
245 const kernel_cap_t *inheritable,
246 const kernel_cap_t *permitted)
247{
248 if (cap_inh_is_capped() &&
249 !cap_issubset(*inheritable,
250 cap_combine(old->cap_inheritable,
251 old->cap_permitted)))
252
253 return -EPERM;
254
255 if (!cap_issubset(*inheritable,
256 cap_combine(old->cap_inheritable,
257 old->cap_bset)))
258
259 return -EPERM;
260
261
262 if (!cap_issubset(*permitted, old->cap_permitted))
263 return -EPERM;
264
265
266 if (!cap_issubset(*effective, *permitted))
267 return -EPERM;
268
269 new->cap_effective = *effective;
270 new->cap_inheritable = *inheritable;
271 new->cap_permitted = *permitted;
272
273
274
275
276
277 new->cap_ambient = cap_intersect(new->cap_ambient,
278 cap_intersect(*permitted,
279 *inheritable));
280 if (WARN_ON(!cap_ambient_invariant_ok(new)))
281 return -EINVAL;
282 return 0;
283}
284
285
286
287
288
289
290
291
292
293
294
295
296int cap_inode_need_killpriv(struct dentry *dentry)
297{
298 struct inode *inode = d_backing_inode(dentry);
299 int error;
300
301 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
302 return error > 0;
303}
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
322{
323 int error;
324
325 error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
326 if (error == -EOPNOTSUPP)
327 error = 0;
328 return error;
329}
330
331static bool rootid_owns_currentns(kuid_t kroot)
332{
333 struct user_namespace *ns;
334
335 if (!uid_valid(kroot))
336 return false;
337
338 for (ns = current_user_ns(); ; ns = ns->parent) {
339 if (from_kuid(ns, kroot) == 0)
340 return true;
341 if (ns == &init_user_ns)
342 break;
343 }
344
345 return false;
346}
347
348static __u32 sansflags(__u32 m)
349{
350 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
351}
352
353static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
354{
355 if (size != XATTR_CAPS_SZ_2)
356 return false;
357 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
358}
359
360static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
361{
362 if (size != XATTR_CAPS_SZ_3)
363 return false;
364 return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
365}
366
367
368
369
370
371
372
373
374
375
376
377
378int cap_inode_getsecurity(struct user_namespace *mnt_userns,
379 struct inode *inode, const char *name, void **buffer,
380 bool alloc)
381{
382 int size, ret;
383 kuid_t kroot;
384 u32 nsmagic, magic;
385 uid_t root, mappedroot;
386 char *tmpbuf = NULL;
387 struct vfs_cap_data *cap;
388 struct vfs_ns_cap_data *nscap = NULL;
389 struct dentry *dentry;
390 struct user_namespace *fs_ns;
391
392 if (strcmp(name, "capability") != 0)
393 return -EOPNOTSUPP;
394
395 dentry = d_find_any_alias(inode);
396 if (!dentry)
397 return -EINVAL;
398
399 size = sizeof(struct vfs_ns_cap_data);
400 ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS,
401 &tmpbuf, size, GFP_NOFS);
402 dput(dentry);
403
404 if (ret < 0 || !tmpbuf)
405 return ret;
406
407 fs_ns = inode->i_sb->s_user_ns;
408 cap = (struct vfs_cap_data *) tmpbuf;
409 if (is_v2header((size_t) ret, cap)) {
410 root = 0;
411 } else if (is_v3header((size_t) ret, cap)) {
412 nscap = (struct vfs_ns_cap_data *) tmpbuf;
413 root = le32_to_cpu(nscap->rootid);
414 } else {
415 size = -EINVAL;
416 goto out_free;
417 }
418
419 kroot = make_kuid(fs_ns, root);
420
421
422 kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
423
424
425
426 mappedroot = from_kuid(current_user_ns(), kroot);
427 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
428 size = sizeof(struct vfs_ns_cap_data);
429 if (alloc) {
430 if (!nscap) {
431
432 nscap = kzalloc(size, GFP_ATOMIC);
433 if (!nscap) {
434 size = -ENOMEM;
435 goto out_free;
436 }
437 nsmagic = VFS_CAP_REVISION_3;
438 magic = le32_to_cpu(cap->magic_etc);
439 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
440 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
441 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
442 nscap->magic_etc = cpu_to_le32(nsmagic);
443 } else {
444
445 tmpbuf = NULL;
446 }
447 nscap->rootid = cpu_to_le32(mappedroot);
448 *buffer = nscap;
449 }
450 goto out_free;
451 }
452
453 if (!rootid_owns_currentns(kroot)) {
454 size = -EOVERFLOW;
455 goto out_free;
456 }
457
458
459 size = sizeof(struct vfs_cap_data);
460 if (alloc) {
461 if (nscap) {
462
463 cap = kzalloc(size, GFP_ATOMIC);
464 if (!cap) {
465 size = -ENOMEM;
466 goto out_free;
467 }
468 magic = VFS_CAP_REVISION_2;
469 nsmagic = le32_to_cpu(nscap->magic_etc);
470 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
471 magic |= VFS_CAP_FLAGS_EFFECTIVE;
472 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
473 cap->magic_etc = cpu_to_le32(magic);
474 } else {
475
476 tmpbuf = NULL;
477 }
478 *buffer = cap;
479 }
480out_free:
481 kfree(tmpbuf);
482 return size;
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static kuid_t rootid_from_xattr(const void *value, size_t size,
501 struct user_namespace *task_ns,
502 struct user_namespace *mnt_userns,
503 struct user_namespace *fs_userns)
504{
505 const struct vfs_ns_cap_data *nscap = value;
506 kuid_t rootkid;
507 uid_t rootid = 0;
508
509 if (size == XATTR_CAPS_SZ_3)
510 rootid = le32_to_cpu(nscap->rootid);
511
512 rootkid = make_kuid(task_ns, rootid);
513 return mapped_kuid_user(mnt_userns, fs_userns, rootkid);
514}
515
516static bool validheader(size_t size, const struct vfs_cap_data *cap)
517{
518 return is_v2header(size, cap) || is_v3header(size, cap);
519}
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
541 const void **ivalue, size_t size)
542{
543 struct vfs_ns_cap_data *nscap;
544 uid_t nsrootid;
545 const struct vfs_cap_data *cap = *ivalue;
546 __u32 magic, nsmagic;
547 struct inode *inode = d_backing_inode(dentry);
548 struct user_namespace *task_ns = current_user_ns(),
549 *fs_ns = inode->i_sb->s_user_ns;
550 kuid_t rootid;
551 size_t newsize;
552
553 if (!*ivalue)
554 return -EINVAL;
555 if (!validheader(size, cap))
556 return -EINVAL;
557 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
558 return -EPERM;
559 if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
560 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
561
562 return size;
563
564 rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
565 if (!uid_valid(rootid))
566 return -EINVAL;
567
568 nsrootid = from_kuid(fs_ns, rootid);
569 if (nsrootid == -1)
570 return -EINVAL;
571
572 newsize = sizeof(struct vfs_ns_cap_data);
573 nscap = kmalloc(newsize, GFP_ATOMIC);
574 if (!nscap)
575 return -ENOMEM;
576 nscap->rootid = cpu_to_le32(nsrootid);
577 nsmagic = VFS_CAP_REVISION_3;
578 magic = le32_to_cpu(cap->magic_etc);
579 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
580 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
581 nscap->magic_etc = cpu_to_le32(nsmagic);
582 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
583
584 *ivalue = nscap;
585 return newsize;
586}
587
588
589
590
591
592static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
593 struct linux_binprm *bprm,
594 bool *effective,
595 bool *has_fcap)
596{
597 struct cred *new = bprm->cred;
598 unsigned i;
599 int ret = 0;
600
601 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
602 *effective = true;
603
604 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
605 *has_fcap = true;
606
607 CAP_FOR_EACH_U32(i) {
608 __u32 permitted = caps->permitted.cap[i];
609 __u32 inheritable = caps->inheritable.cap[i];
610
611
612
613
614
615 new->cap_permitted.cap[i] =
616 (new->cap_bset.cap[i] & permitted) |
617 (new->cap_inheritable.cap[i] & inheritable);
618
619 if (permitted & ~new->cap_permitted.cap[i])
620
621 ret = -EPERM;
622 }
623
624
625
626
627
628
629 return *effective ? ret : 0;
630}
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
648 const struct dentry *dentry,
649 struct cpu_vfs_cap_data *cpu_caps)
650{
651 struct inode *inode = d_backing_inode(dentry);
652 __u32 magic_etc;
653 unsigned tocopy, i;
654 int size;
655 struct vfs_ns_cap_data data, *nscaps = &data;
656 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
657 kuid_t rootkuid;
658 struct user_namespace *fs_ns;
659
660 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
661
662 if (!inode)
663 return -ENODATA;
664
665 fs_ns = inode->i_sb->s_user_ns;
666 size = __vfs_getxattr((struct dentry *)dentry, inode,
667 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
668 if (size == -ENODATA || size == -EOPNOTSUPP)
669
670 return -ENODATA;
671
672 if (size < 0)
673 return size;
674
675 if (size < sizeof(magic_etc))
676 return -EINVAL;
677
678 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
679
680 rootkuid = make_kuid(fs_ns, 0);
681 switch (magic_etc & VFS_CAP_REVISION_MASK) {
682 case VFS_CAP_REVISION_1:
683 if (size != XATTR_CAPS_SZ_1)
684 return -EINVAL;
685 tocopy = VFS_CAP_U32_1;
686 break;
687 case VFS_CAP_REVISION_2:
688 if (size != XATTR_CAPS_SZ_2)
689 return -EINVAL;
690 tocopy = VFS_CAP_U32_2;
691 break;
692 case VFS_CAP_REVISION_3:
693 if (size != XATTR_CAPS_SZ_3)
694 return -EINVAL;
695 tocopy = VFS_CAP_U32_3;
696 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
697 break;
698
699 default:
700 return -EINVAL;
701 }
702
703
704
705 rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
706 if (!rootid_owns_currentns(rootkuid))
707 return -ENODATA;
708
709 CAP_FOR_EACH_U32(i) {
710 if (i >= tocopy)
711 break;
712 cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
713 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
714 }
715
716 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
717 cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
718
719 cpu_caps->rootid = rootkuid;
720
721 return 0;
722}
723
724
725
726
727
728
729static int get_file_caps(struct linux_binprm *bprm, struct file *file,
730 bool *effective, bool *has_fcap)
731{
732 int rc = 0;
733 struct cpu_vfs_cap_data vcaps;
734
735 cap_clear(bprm->cred->cap_permitted);
736
737 if (!file_caps_enabled)
738 return 0;
739
740 if (!mnt_may_suid(file->f_path.mnt))
741 return 0;
742
743
744
745
746
747
748 if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
749 return 0;
750
751 rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
752 file->f_path.dentry, &vcaps);
753 if (rc < 0) {
754 if (rc == -EINVAL)
755 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
756 bprm->filename);
757 else if (rc == -ENODATA)
758 rc = 0;
759 goto out;
760 }
761
762 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
763
764out:
765 if (rc)
766 cap_clear(bprm->cred->cap_permitted);
767
768 return rc;
769}
770
771static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
772
773static inline bool __is_real(kuid_t uid, struct cred *cred)
774{ return uid_eq(cred->uid, uid); }
775
776static inline bool __is_eff(kuid_t uid, struct cred *cred)
777{ return uid_eq(cred->euid, uid); }
778
779static inline bool __is_suid(kuid_t uid, struct cred *cred)
780{ return !__is_real(uid, cred) && __is_eff(uid, cred); }
781
782
783
784
785
786
787
788
789
790
791
792
793
794static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
795 bool *effective, kuid_t root_uid)
796{
797 const struct cred *old = current_cred();
798 struct cred *new = bprm->cred;
799
800 if (!root_privileged())
801 return;
802
803
804
805
806
807 if (has_fcap && __is_suid(root_uid, new)) {
808 warn_setuid_and_fcaps_mixed(bprm->filename);
809 return;
810 }
811
812
813
814
815
816 if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
817
818 new->cap_permitted = cap_combine(old->cap_bset,
819 old->cap_inheritable);
820 }
821
822
823
824 if (__is_eff(root_uid, new))
825 *effective = true;
826}
827
828#define __cap_gained(field, target, source) \
829 !cap_issubset(target->cap_##field, source->cap_##field)
830#define __cap_grew(target, source, cred) \
831 !cap_issubset(cred->cap_##target, cred->cap_##source)
832#define __cap_full(field, cred) \
833 cap_issubset(CAP_FULL_SET, cred->cap_##field)
834
835static inline bool __is_setuid(struct cred *new, const struct cred *old)
836{ return !uid_eq(new->euid, old->uid); }
837
838static inline bool __is_setgid(struct cred *new, const struct cred *old)
839{ return !gid_eq(new->egid, old->gid); }
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
859 kuid_t root, bool has_fcap)
860{
861 bool ret = false;
862
863 if ((__cap_grew(effective, ambient, new) &&
864 !(__cap_full(effective, new) &&
865 (__is_eff(root, new) || __is_real(root, new)) &&
866 root_privileged())) ||
867 (root_privileged() &&
868 __is_suid(root, new) &&
869 !__cap_full(effective, new)) ||
870 (!__is_setuid(new, old) &&
871 ((has_fcap &&
872 __cap_gained(permitted, new, old)) ||
873 __cap_gained(ambient, new, old))))
874
875 ret = true;
876
877 return ret;
878}
879
880
881
882
883
884
885
886
887
888
889
890
891int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
892{
893
894 const struct cred *old = current_cred();
895 struct cred *new = bprm->cred;
896 bool effective = false, has_fcap = false, is_setid;
897 int ret;
898 kuid_t root_uid;
899
900 if (WARN_ON(!cap_ambient_invariant_ok(old)))
901 return -EPERM;
902
903 ret = get_file_caps(bprm, file, &effective, &has_fcap);
904 if (ret < 0)
905 return ret;
906
907 root_uid = make_kuid(new->user_ns, 0);
908
909 handle_privileged_root(bprm, has_fcap, &effective, root_uid);
910
911
912 if (__cap_gained(permitted, new, old))
913 bprm->per_clear |= PER_CLEAR_ON_SETID;
914
915
916
917
918
919
920 is_setid = __is_setuid(new, old) || __is_setgid(new, old);
921
922 if ((is_setid || __cap_gained(permitted, new, old)) &&
923 ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
924 !ptracer_capable(current, new->user_ns))) {
925
926 if (!ns_capable(new->user_ns, CAP_SETUID) ||
927 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
928 new->euid = new->uid;
929 new->egid = new->gid;
930 }
931 new->cap_permitted = cap_intersect(new->cap_permitted,
932 old->cap_permitted);
933 }
934
935 new->suid = new->fsuid = new->euid;
936 new->sgid = new->fsgid = new->egid;
937
938
939 if (has_fcap || is_setid)
940 cap_clear(new->cap_ambient);
941
942
943
944
945
946 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
947
948
949
950
951
952 if (effective)
953 new->cap_effective = new->cap_permitted;
954 else
955 new->cap_effective = new->cap_ambient;
956
957 if (WARN_ON(!cap_ambient_invariant_ok(new)))
958 return -EPERM;
959
960 if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
961 ret = audit_log_bprm_fcaps(bprm, new, old);
962 if (ret < 0)
963 return ret;
964 }
965
966 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
967
968 if (WARN_ON(!cap_ambient_invariant_ok(new)))
969 return -EPERM;
970
971
972 if (is_setid ||
973 (!__is_real(root_uid, new) &&
974 (effective ||
975 __cap_grew(permitted, ambient, new))))
976 bprm->secureexec = 1;
977
978 return 0;
979}
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995int cap_inode_setxattr(struct dentry *dentry, const char *name,
996 const void *value, size_t size, int flags)
997{
998 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
999
1000
1001 if (strncmp(name, XATTR_SECURITY_PREFIX,
1002 XATTR_SECURITY_PREFIX_LEN) != 0)
1003 return 0;
1004
1005
1006
1007
1008
1009 if (strcmp(name, XATTR_NAME_CAPS) == 0)
1010 return 0;
1011
1012 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1013 return -EPERM;
1014 return 0;
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036int cap_inode_removexattr(struct user_namespace *mnt_userns,
1037 struct dentry *dentry, const char *name)
1038{
1039 struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1040
1041
1042 if (strncmp(name, XATTR_SECURITY_PREFIX,
1043 XATTR_SECURITY_PREFIX_LEN) != 0)
1044 return 0;
1045
1046 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
1047
1048 struct inode *inode = d_backing_inode(dentry);
1049 if (!inode)
1050 return -EINVAL;
1051 if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
1052 return -EPERM;
1053 return 0;
1054 }
1055
1056 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1057 return -EPERM;
1058 return 0;
1059}
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
1091{
1092 kuid_t root_uid = make_kuid(old->user_ns, 0);
1093
1094 if ((uid_eq(old->uid, root_uid) ||
1095 uid_eq(old->euid, root_uid) ||
1096 uid_eq(old->suid, root_uid)) &&
1097 (!uid_eq(new->uid, root_uid) &&
1098 !uid_eq(new->euid, root_uid) &&
1099 !uid_eq(new->suid, root_uid))) {
1100 if (!issecure(SECURE_KEEP_CAPS)) {
1101 cap_clear(new->cap_permitted);
1102 cap_clear(new->cap_effective);
1103 }
1104
1105
1106
1107
1108
1109
1110 cap_clear(new->cap_ambient);
1111 }
1112 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1113 cap_clear(new->cap_effective);
1114 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1115 new->cap_effective = new->cap_permitted;
1116}
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
1130{
1131 switch (flags) {
1132 case LSM_SETID_RE:
1133 case LSM_SETID_ID:
1134 case LSM_SETID_RES:
1135
1136
1137 if (!issecure(SECURE_NO_SETUID_FIXUP))
1138 cap_emulate_setxuid(new, old);
1139 break;
1140
1141 case LSM_SETID_FS:
1142
1143
1144
1145
1146
1147
1148 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
1149 kuid_t root_uid = make_kuid(old->user_ns, 0);
1150 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1151 new->cap_effective =
1152 cap_drop_fs_set(new->cap_effective);
1153
1154 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1155 new->cap_effective =
1156 cap_raise_fs_set(new->cap_effective,
1157 new->cap_permitted);
1158 }
1159 break;
1160
1161 default:
1162 return -EINVAL;
1163 }
1164
1165 return 0;
1166}
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178static int cap_safe_nice(struct task_struct *p)
1179{
1180 int is_subset, ret = 0;
1181
1182 rcu_read_lock();
1183 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1184 current_cred()->cap_permitted);
1185 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1186 ret = -EPERM;
1187 rcu_read_unlock();
1188
1189 return ret;
1190}
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201int cap_task_setscheduler(struct task_struct *p)
1202{
1203 return cap_safe_nice(p);
1204}
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216int cap_task_setioprio(struct task_struct *p, int ioprio)
1217{
1218 return cap_safe_nice(p);
1219}
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231int cap_task_setnice(struct task_struct *p, int nice)
1232{
1233 return cap_safe_nice(p);
1234}
1235
1236
1237
1238
1239
1240static int cap_prctl_drop(unsigned long cap)
1241{
1242 struct cred *new;
1243
1244 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
1245 return -EPERM;
1246 if (!cap_valid(cap))
1247 return -EINVAL;
1248
1249 new = prepare_creds();
1250 if (!new)
1251 return -ENOMEM;
1252 cap_lower(new->cap_bset, cap);
1253 return commit_creds(new);
1254}
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1272 unsigned long arg4, unsigned long arg5)
1273{
1274 const struct cred *old = current_cred();
1275 struct cred *new;
1276
1277 switch (option) {
1278 case PR_CAPBSET_READ:
1279 if (!cap_valid(arg2))
1280 return -EINVAL;
1281 return !!cap_raised(old->cap_bset, arg2);
1282
1283 case PR_CAPBSET_DROP:
1284 return cap_prctl_drop(arg2);
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305 case PR_SET_SECUREBITS:
1306 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
1307 & (old->securebits ^ arg2))
1308 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))
1309 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))
1310 || (cap_capable(current_cred(),
1311 current_cred()->user_ns,
1312 CAP_SETPCAP,
1313 CAP_OPT_NONE) != 0)
1314
1315
1316
1317
1318
1319
1320
1321 )
1322
1323 return -EPERM;
1324
1325 new = prepare_creds();
1326 if (!new)
1327 return -ENOMEM;
1328 new->securebits = arg2;
1329 return commit_creds(new);
1330
1331 case PR_GET_SECUREBITS:
1332 return old->securebits;
1333
1334 case PR_GET_KEEPCAPS:
1335 return !!issecure(SECURE_KEEP_CAPS);
1336
1337 case PR_SET_KEEPCAPS:
1338 if (arg2 > 1)
1339 return -EINVAL;
1340 if (issecure(SECURE_KEEP_CAPS_LOCKED))
1341 return -EPERM;
1342
1343 new = prepare_creds();
1344 if (!new)
1345 return -ENOMEM;
1346 if (arg2)
1347 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1348 else
1349 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1350 return commit_creds(new);
1351
1352 case PR_CAP_AMBIENT:
1353 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1354 if (arg3 | arg4 | arg5)
1355 return -EINVAL;
1356
1357 new = prepare_creds();
1358 if (!new)
1359 return -ENOMEM;
1360 cap_clear(new->cap_ambient);
1361 return commit_creds(new);
1362 }
1363
1364 if (((!cap_valid(arg3)) | arg4 | arg5))
1365 return -EINVAL;
1366
1367 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1368 return !!cap_raised(current_cred()->cap_ambient, arg3);
1369 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1370 arg2 != PR_CAP_AMBIENT_LOWER) {
1371 return -EINVAL;
1372 } else {
1373 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1374 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1375 !cap_raised(current_cred()->cap_inheritable,
1376 arg3) ||
1377 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1378 return -EPERM;
1379
1380 new = prepare_creds();
1381 if (!new)
1382 return -ENOMEM;
1383 if (arg2 == PR_CAP_AMBIENT_RAISE)
1384 cap_raise(new->cap_ambient, arg3);
1385 else
1386 cap_lower(new->cap_ambient, arg3);
1387 return commit_creds(new);
1388 }
1389
1390 default:
1391
1392 return -ENOSYS;
1393 }
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1407{
1408 int cap_sys_admin = 0;
1409
1410 if (cap_capable(current_cred(), &init_user_ns,
1411 CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
1412 cap_sys_admin = 1;
1413
1414 return cap_sys_admin;
1415}
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427int cap_mmap_addr(unsigned long addr)
1428{
1429 int ret = 0;
1430
1431 if (addr < dac_mmap_min_addr) {
1432 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1433 CAP_OPT_NONE);
1434
1435 if (ret == 0)
1436 current->flags |= PF_SUPERPRIV;
1437 }
1438 return ret;
1439}
1440
1441int cap_mmap_file(struct file *file, unsigned long reqprot,
1442 unsigned long prot, unsigned long flags)
1443{
1444 return 0;
1445}
1446
1447#ifdef CONFIG_SECURITY
1448
1449static struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1450 LSM_HOOK_INIT(capable, cap_capable),
1451 LSM_HOOK_INIT(settime, cap_settime),
1452 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1453 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1454 LSM_HOOK_INIT(capget, cap_capget),
1455 LSM_HOOK_INIT(capset, cap_capset),
1456 LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1457 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1458 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1459 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1460 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1461 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1462 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1463 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1464 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1465 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1466 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1467 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1468};
1469
1470static int __init capability_init(void)
1471{
1472 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1473 "capability");
1474 return 0;
1475}
1476
1477DEFINE_LSM(capability) = {
1478 .name = "capability",
1479 .order = LSM_ORDER_FIRST,
1480 .init = capability_init,
1481};
1482
1483#endif
1484