1
2#include <linux/mm.h>
3#include <linux/slab.h>
4#include <linux/string.h>
5#include <linux/compiler.h>
6#include <linux/export.h>
7#include <linux/err.h>
8#include <linux/sched.h>
9#include <linux/sched/mm.h>
10#include <linux/sched/task_stack.h>
11#include <linux/security.h>
12#include <linux/swap.h>
13#include <linux/swapops.h>
14#include <linux/mman.h>
15#include <linux/hugetlb.h>
16#include <linux/vmalloc.h>
17#include <linux/userfaultfd_k.h>
18
19#include <linux/uaccess.h>
20
21#include "internal.h"
22
23
24
25
26
27
28
29void kfree_const(const void *x)
30{
31 if (!is_kernel_rodata((unsigned long)x))
32 kfree(x);
33}
34EXPORT_SYMBOL(kfree_const);
35
36
37
38
39
40
41
42
43char *kstrdup(const char *s, gfp_t gfp)
44{
45 size_t len;
46 char *buf;
47
48 if (!s)
49 return NULL;
50
51 len = strlen(s) + 1;
52 buf = kmalloc_track_caller(len, gfp);
53 if (buf)
54 memcpy(buf, s, len);
55 return buf;
56}
57EXPORT_SYMBOL(kstrdup);
58
59
60
61
62
63
64
65
66
67
68
69const char *kstrdup_const(const char *s, gfp_t gfp)
70{
71 if (is_kernel_rodata((unsigned long)s))
72 return s;
73
74 return kstrdup(s, gfp);
75}
76EXPORT_SYMBOL(kstrdup_const);
77
78
79
80
81
82
83
84
85
86
87
88char *kstrndup(const char *s, size_t max, gfp_t gfp)
89{
90 size_t len;
91 char *buf;
92
93 if (!s)
94 return NULL;
95
96 len = strnlen(s, max);
97 buf = kmalloc_track_caller(len+1, gfp);
98 if (buf) {
99 memcpy(buf, s, len);
100 buf[len] = '\0';
101 }
102 return buf;
103}
104EXPORT_SYMBOL(kstrndup);
105
106
107
108
109
110
111
112
113
114
115void *kmemdup(const void *src, size_t len, gfp_t gfp)
116{
117 void *p;
118
119 p = kmalloc_track_caller(len, gfp);
120 if (p)
121 memcpy(p, src, len);
122 return p;
123}
124EXPORT_SYMBOL(kmemdup);
125
126
127
128
129
130
131
132
133
134
135char *kmemdup_nul(const char *s, size_t len, gfp_t gfp)
136{
137 char *buf;
138
139 if (!s)
140 return NULL;
141
142 buf = kmalloc_track_caller(len + 1, gfp);
143 if (buf) {
144 memcpy(buf, s, len);
145 buf[len] = '\0';
146 }
147 return buf;
148}
149EXPORT_SYMBOL(kmemdup_nul);
150
151
152
153
154
155
156
157
158
159
160void *memdup_user(const void __user *src, size_t len)
161{
162 void *p;
163
164 p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
165 if (!p)
166 return ERR_PTR(-ENOMEM);
167
168 if (copy_from_user(p, src, len)) {
169 kfree(p);
170 return ERR_PTR(-EFAULT);
171 }
172
173 return p;
174}
175EXPORT_SYMBOL(memdup_user);
176
177
178
179
180
181
182
183
184
185
186void *vmemdup_user(const void __user *src, size_t len)
187{
188 void *p;
189
190 p = kvmalloc(len, GFP_USER);
191 if (!p)
192 return ERR_PTR(-ENOMEM);
193
194 if (copy_from_user(p, src, len)) {
195 kvfree(p);
196 return ERR_PTR(-EFAULT);
197 }
198
199 return p;
200}
201EXPORT_SYMBOL(vmemdup_user);
202
203
204
205
206
207
208
209
210char *strndup_user(const char __user *s, long n)
211{
212 char *p;
213 long length;
214
215 length = strnlen_user(s, n);
216
217 if (!length)
218 return ERR_PTR(-EFAULT);
219
220 if (length > n)
221 return ERR_PTR(-EINVAL);
222
223 p = memdup_user(s, length);
224
225 if (IS_ERR(p))
226 return p;
227
228 p[length - 1] = '\0';
229
230 return p;
231}
232EXPORT_SYMBOL(strndup_user);
233
234
235
236
237
238
239
240
241
242void *memdup_user_nul(const void __user *src, size_t len)
243{
244 char *p;
245
246
247
248
249
250
251 p = kmalloc_track_caller(len + 1, GFP_KERNEL);
252 if (!p)
253 return ERR_PTR(-ENOMEM);
254
255 if (copy_from_user(p, src, len)) {
256 kfree(p);
257 return ERR_PTR(-EFAULT);
258 }
259 p[len] = '\0';
260
261 return p;
262}
263EXPORT_SYMBOL(memdup_user_nul);
264
265void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
266 struct vm_area_struct *prev, struct rb_node *rb_parent)
267{
268 struct vm_area_struct *next;
269
270 vma->vm_prev = prev;
271 if (prev) {
272 next = prev->vm_next;
273 prev->vm_next = vma;
274 } else {
275 mm->mmap = vma;
276 if (rb_parent)
277 next = rb_entry(rb_parent,
278 struct vm_area_struct, vm_rb);
279 else
280 next = NULL;
281 }
282 vma->vm_next = next;
283 if (next)
284 next->vm_prev = vma;
285}
286
287
288int vma_is_stack_for_current(struct vm_area_struct *vma)
289{
290 struct task_struct * __maybe_unused t = current;
291
292 return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
293}
294
295#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
296void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
297{
298 mm->mmap_base = TASK_UNMAPPED_BASE;
299 mm->get_unmapped_area = arch_get_unmapped_area;
300}
301#endif
302
303
304
305
306
307
308
309
310
311int __weak __get_user_pages_fast(unsigned long start,
312 int nr_pages, int write, struct page **pages)
313{
314 return 0;
315}
316EXPORT_SYMBOL_GPL(__get_user_pages_fast);
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342int __weak get_user_pages_fast(unsigned long start,
343 int nr_pages, unsigned int gup_flags,
344 struct page **pages)
345{
346 return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
347}
348EXPORT_SYMBOL_GPL(get_user_pages_fast);
349
350unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
351 unsigned long len, unsigned long prot,
352 unsigned long flag, unsigned long pgoff)
353{
354 unsigned long ret;
355 struct mm_struct *mm = current->mm;
356 unsigned long populate;
357 LIST_HEAD(uf);
358
359 ret = security_mmap_file(file, prot, flag);
360 if (!ret) {
361 if (down_write_killable(&mm->mmap_sem))
362 return -EINTR;
363 ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
364 &populate, &uf);
365 up_write(&mm->mmap_sem);
366 userfaultfd_unmap_complete(mm, &uf);
367 if (populate)
368 mm_populate(ret, populate);
369 }
370 return ret;
371}
372
373unsigned long vm_mmap(struct file *file, unsigned long addr,
374 unsigned long len, unsigned long prot,
375 unsigned long flag, unsigned long offset)
376{
377 if (unlikely(offset + PAGE_ALIGN(len) < offset))
378 return -EINVAL;
379 if (unlikely(offset_in_page(offset)))
380 return -EINVAL;
381
382 return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
383}
384EXPORT_SYMBOL(vm_mmap);
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405void *kvmalloc_node(size_t size, gfp_t flags, int node)
406{
407 gfp_t kmalloc_flags = flags;
408 void *ret;
409
410
411
412
413
414 if ((flags & GFP_KERNEL) != GFP_KERNEL)
415 return kmalloc_node(size, flags, node);
416
417
418
419
420
421
422
423
424 if (size > PAGE_SIZE) {
425 kmalloc_flags |= __GFP_NOWARN;
426
427 if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
428 kmalloc_flags |= __GFP_NORETRY;
429 }
430
431 ret = kmalloc_node(size, kmalloc_flags, node);
432
433
434
435
436
437 if (ret || size <= PAGE_SIZE)
438 return ret;
439
440 return __vmalloc_node_flags_caller(size, node, flags,
441 __builtin_return_address(0));
442}
443EXPORT_SYMBOL(kvmalloc_node);
444
445
446
447
448
449
450
451
452
453
454
455void kvfree(const void *addr)
456{
457 if (is_vmalloc_addr(addr))
458 vfree(addr);
459 else
460 kfree(addr);
461}
462EXPORT_SYMBOL(kvfree);
463
464static inline void *__page_rmapping(struct page *page)
465{
466 unsigned long mapping;
467
468 mapping = (unsigned long)page->mapping;
469 mapping &= ~PAGE_MAPPING_FLAGS;
470
471 return (void *)mapping;
472}
473
474
475void *page_rmapping(struct page *page)
476{
477 page = compound_head(page);
478 return __page_rmapping(page);
479}
480
481
482
483
484
485bool page_mapped(struct page *page)
486{
487 int i;
488
489 if (likely(!PageCompound(page)))
490 return atomic_read(&page->_mapcount) >= 0;
491 page = compound_head(page);
492 if (atomic_read(compound_mapcount_ptr(page)) >= 0)
493 return true;
494 if (PageHuge(page))
495 return false;
496 for (i = 0; i < (1 << compound_order(page)); i++) {
497 if (atomic_read(&page[i]._mapcount) >= 0)
498 return true;
499 }
500 return false;
501}
502EXPORT_SYMBOL(page_mapped);
503
504struct anon_vma *page_anon_vma(struct page *page)
505{
506 unsigned long mapping;
507
508 page = compound_head(page);
509 mapping = (unsigned long)page->mapping;
510 if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
511 return NULL;
512 return __page_rmapping(page);
513}
514
515struct address_space *page_mapping(struct page *page)
516{
517 struct address_space *mapping;
518
519 page = compound_head(page);
520
521
522 if (unlikely(PageSlab(page)))
523 return NULL;
524
525 if (unlikely(PageSwapCache(page))) {
526 swp_entry_t entry;
527
528 entry.val = page_private(page);
529 return swap_address_space(entry);
530 }
531
532 mapping = page->mapping;
533 if ((unsigned long)mapping & PAGE_MAPPING_ANON)
534 return NULL;
535
536 return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
537}
538EXPORT_SYMBOL(page_mapping);
539
540
541
542
543struct address_space *page_mapping_file(struct page *page)
544{
545 if (unlikely(PageSwapCache(page)))
546 return NULL;
547 return page_mapping(page);
548}
549
550
551int __page_mapcount(struct page *page)
552{
553 int ret;
554
555 ret = atomic_read(&page->_mapcount) + 1;
556
557
558
559
560 if (!PageAnon(page) && !PageHuge(page))
561 return ret;
562 page = compound_head(page);
563 ret += atomic_read(compound_mapcount_ptr(page)) + 1;
564 if (PageDoubleMap(page))
565 ret--;
566 return ret;
567}
568EXPORT_SYMBOL_GPL(__page_mapcount);
569
570int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
571int sysctl_overcommit_ratio __read_mostly = 50;
572unsigned long sysctl_overcommit_kbytes __read_mostly;
573int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
574unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
575unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
576
577int overcommit_ratio_handler(struct ctl_table *table, int write,
578 void __user *buffer, size_t *lenp,
579 loff_t *ppos)
580{
581 int ret;
582
583 ret = proc_dointvec(table, write, buffer, lenp, ppos);
584 if (ret == 0 && write)
585 sysctl_overcommit_kbytes = 0;
586 return ret;
587}
588
589int overcommit_kbytes_handler(struct ctl_table *table, int write,
590 void __user *buffer, size_t *lenp,
591 loff_t *ppos)
592{
593 int ret;
594
595 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
596 if (ret == 0 && write)
597 sysctl_overcommit_ratio = 0;
598 return ret;
599}
600
601
602
603
604unsigned long vm_commit_limit(void)
605{
606 unsigned long allowed;
607
608 if (sysctl_overcommit_kbytes)
609 allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
610 else
611 allowed = ((totalram_pages() - hugetlb_total_pages())
612 * sysctl_overcommit_ratio / 100);
613 allowed += total_swap_pages;
614
615 return allowed;
616}
617
618
619
620
621
622struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
623
624
625
626
627
628
629
630
631
632unsigned long vm_memory_committed(void)
633{
634 return percpu_counter_read_positive(&vm_committed_as);
635}
636EXPORT_SYMBOL_GPL(vm_memory_committed);
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
655{
656 long allowed;
657
658 VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
659 -(s64)vm_committed_as_batch * num_online_cpus(),
660 "memory commitment underflow");
661
662 vm_acct_memory(pages);
663
664
665
666
667 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
668 return 0;
669
670 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
671 if (pages > totalram_pages() + total_swap_pages)
672 goto error;
673 return 0;
674 }
675
676 allowed = vm_commit_limit();
677
678
679
680 if (!cap_sys_admin)
681 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
682
683
684
685
686 if (mm) {
687 long reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
688
689 allowed -= min_t(long, mm->total_vm / 32, reserve);
690 }
691
692 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
693 return 0;
694error:
695 vm_unacct_memory(pages);
696
697 return -ENOMEM;
698}
699
700
701
702
703
704
705
706
707
708
709
710int get_cmdline(struct task_struct *task, char *buffer, int buflen)
711{
712 int res = 0;
713 unsigned int len;
714 struct mm_struct *mm = get_task_mm(task);
715 unsigned long arg_start, arg_end, env_start, env_end;
716 if (!mm)
717 goto out;
718 if (!mm->arg_end)
719 goto out_mm;
720
721 spin_lock(&mm->arg_lock);
722 arg_start = mm->arg_start;
723 arg_end = mm->arg_end;
724 env_start = mm->env_start;
725 env_end = mm->env_end;
726 spin_unlock(&mm->arg_lock);
727
728 len = arg_end - arg_start;
729
730 if (len > buflen)
731 len = buflen;
732
733 res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
734
735
736
737
738
739 if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
740 len = strnlen(buffer, res);
741 if (len < res) {
742 res = len;
743 } else {
744 len = env_end - env_start;
745 if (len > buflen - res)
746 len = buflen - res;
747 res += access_process_vm(task, env_start,
748 buffer+res, len,
749 FOLL_FORCE);
750 res = strnlen(buffer, res);
751 }
752 }
753out_mm:
754 mmput(mm);
755out:
756 return res;
757}
758