linux/mm/util.c
<<
>>
Prefs
   1#include <linux/mm.h>
   2#include <linux/slab.h>
   3#include <linux/string.h>
   4#include <linux/compiler.h>
   5#include <linux/export.h>
   6#include <linux/err.h>
   7#include <linux/sched.h>
   8#include <linux/security.h>
   9#include <linux/swap.h>
  10#include <linux/swapops.h>
  11#include <linux/mman.h>
  12#include <linux/hugetlb.h>
  13#include <linux/vmalloc.h>
  14
  15#include <asm/sections.h>
  16#include <asm/uaccess.h>
  17
  18#include "internal.h"
  19
  20static inline int is_kernel_rodata(unsigned long addr)
  21{
  22        return addr >= (unsigned long)__start_rodata &&
  23                addr < (unsigned long)__end_rodata;
  24}
  25
  26/**
  27 * kfree_const - conditionally free memory
  28 * @x: pointer to the memory
  29 *
  30 * Function calls kfree only if @x is not in .rodata section.
  31 */
  32void kfree_const(const void *x)
  33{
  34        if (!is_kernel_rodata((unsigned long)x))
  35                kfree(x);
  36}
  37EXPORT_SYMBOL(kfree_const);
  38
  39/**
  40 * kstrdup - allocate space for and copy an existing string
  41 * @s: the string to duplicate
  42 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  43 */
  44char *kstrdup(const char *s, gfp_t gfp)
  45{
  46        size_t len;
  47        char *buf;
  48
  49        if (!s)
  50                return NULL;
  51
  52        len = strlen(s) + 1;
  53        buf = kmalloc_track_caller(len, gfp);
  54        if (buf)
  55                memcpy(buf, s, len);
  56        return buf;
  57}
  58EXPORT_SYMBOL(kstrdup);
  59
  60/**
  61 * kstrdup_const - conditionally duplicate an existing const string
  62 * @s: the string to duplicate
  63 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  64 *
  65 * Function returns source string if it is in .rodata section otherwise it
  66 * fallbacks to kstrdup.
  67 * Strings allocated by kstrdup_const should be freed by kfree_const.
  68 */
  69const char *kstrdup_const(const char *s, gfp_t gfp)
  70{
  71        if (is_kernel_rodata((unsigned long)s))
  72                return s;
  73
  74        return kstrdup(s, gfp);
  75}
  76EXPORT_SYMBOL(kstrdup_const);
  77
  78/**
  79 * kstrndup - allocate space for and copy an existing string
  80 * @s: the string to duplicate
  81 * @max: read at most @max chars from @s
  82 * @gfp: the GFP mask used in the kmalloc() call when allocating memory
  83 */
  84char *kstrndup(const char *s, size_t max, gfp_t gfp)
  85{
  86        size_t len;
  87        char *buf;
  88
  89        if (!s)
  90                return NULL;
  91
  92        len = strnlen(s, max);
  93        buf = kmalloc_track_caller(len+1, gfp);
  94        if (buf) {
  95                memcpy(buf, s, len);
  96                buf[len] = '\0';
  97        }
  98        return buf;
  99}
 100EXPORT_SYMBOL(kstrndup);
 101
 102/**
 103 * kmemdup - duplicate region of memory
 104 *
 105 * @src: memory region to duplicate
 106 * @len: memory region length
 107 * @gfp: GFP mask to use
 108 */
 109void *kmemdup(const void *src, size_t len, gfp_t gfp)
 110{
 111        void *p;
 112
 113        p = kmalloc_track_caller(len, gfp);
 114        if (p)
 115                memcpy(p, src, len);
 116        return p;
 117}
 118EXPORT_SYMBOL(kmemdup);
 119
 120/**
 121 * memdup_user - duplicate memory region from user space
 122 *
 123 * @src: source address in user space
 124 * @len: number of bytes to copy
 125 *
 126 * Returns an ERR_PTR() on failure.
 127 */
 128void *memdup_user(const void __user *src, size_t len)
 129{
 130        void *p;
 131
 132        /*
 133         * Always use GFP_KERNEL, since copy_from_user() can sleep and
 134         * cause pagefault, which makes it pointless to use GFP_NOFS
 135         * or GFP_ATOMIC.
 136         */
 137        p = kmalloc_track_caller(len, GFP_KERNEL);
 138        if (!p)
 139                return ERR_PTR(-ENOMEM);
 140
 141        if (copy_from_user(p, src, len)) {
 142                kfree(p);
 143                return ERR_PTR(-EFAULT);
 144        }
 145
 146        return p;
 147}
 148EXPORT_SYMBOL(memdup_user);
 149
 150/*
 151 * strndup_user - duplicate an existing string from user space
 152 * @s: The string to duplicate
 153 * @n: Maximum number of bytes to copy, including the trailing NUL.
 154 */
 155char *strndup_user(const char __user *s, long n)
 156{
 157        char *p;
 158        long length;
 159
 160        length = strnlen_user(s, n);
 161
 162        if (!length)
 163                return ERR_PTR(-EFAULT);
 164
 165        if (length > n)
 166                return ERR_PTR(-EINVAL);
 167
 168        p = memdup_user(s, length);
 169
 170        if (IS_ERR(p))
 171                return p;
 172
 173        p[length - 1] = '\0';
 174
 175        return p;
 176}
 177EXPORT_SYMBOL(strndup_user);
 178
 179void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 180                struct vm_area_struct *prev, struct rb_node *rb_parent)
 181{
 182        struct vm_area_struct *next;
 183
 184        vma->vm_prev = prev;
 185        if (prev) {
 186                next = prev->vm_next;
 187                prev->vm_next = vma;
 188        } else {
 189                mm->mmap = vma;
 190                if (rb_parent)
 191                        next = rb_entry(rb_parent,
 192                                        struct vm_area_struct, vm_rb);
 193                else
 194                        next = NULL;
 195        }
 196        vma->vm_next = next;
 197        if (next)
 198                next->vm_prev = vma;
 199}
 200
 201/* Check if the vma is being used as a stack by this task */
 202static int vm_is_stack_for_task(struct task_struct *t,
 203                                struct vm_area_struct *vma)
 204{
 205        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 206}
 207
 208/*
 209 * Check if the vma is being used as a stack.
 210 * If is_group is non-zero, check in the entire thread group or else
 211 * just check in the current task. Returns the task_struct of the task
 212 * that the vma is stack for. Must be called under rcu_read_lock().
 213 */
 214struct task_struct *task_of_stack(struct task_struct *task,
 215                                struct vm_area_struct *vma, bool in_group)
 216{
 217        if (vm_is_stack_for_task(task, vma))
 218                return task;
 219
 220        if (in_group) {
 221                struct task_struct *t;
 222
 223                for_each_thread(task, t) {
 224                        if (vm_is_stack_for_task(t, vma))
 225                                return t;
 226                }
 227        }
 228
 229        return NULL;
 230}
 231
 232#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 233void arch_pick_mmap_layout(struct mm_struct *mm)
 234{
 235        mm->mmap_base = TASK_UNMAPPED_BASE;
 236        mm->get_unmapped_area = arch_get_unmapped_area;
 237}
 238#endif
 239
 240/*
 241 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
 242 * back to the regular GUP.
 243 * If the architecture not support this function, simply return with no
 244 * page pinned
 245 */
 246int __weak __get_user_pages_fast(unsigned long start,
 247                                 int nr_pages, int write, struct page **pages)
 248{
 249        return 0;
 250}
 251EXPORT_SYMBOL_GPL(__get_user_pages_fast);
 252
 253/**
 254 * get_user_pages_fast() - pin user pages in memory
 255 * @start:      starting user address
 256 * @nr_pages:   number of pages from start to pin
 257 * @write:      whether pages will be written to
 258 * @pages:      array that receives pointers to the pages pinned.
 259 *              Should be at least nr_pages long.
 260 *
 261 * Returns number of pages pinned. This may be fewer than the number
 262 * requested. If nr_pages is 0 or negative, returns 0. If no pages
 263 * were pinned, returns -errno.
 264 *
 265 * get_user_pages_fast provides equivalent functionality to get_user_pages,
 266 * operating on current and current->mm, with force=0 and vma=NULL. However
 267 * unlike get_user_pages, it must be called without mmap_sem held.
 268 *
 269 * get_user_pages_fast may take mmap_sem and page table locks, so no
 270 * assumptions can be made about lack of locking. get_user_pages_fast is to be
 271 * implemented in a way that is advantageous (vs get_user_pages()) when the
 272 * user memory area is already faulted in and present in ptes. However if the
 273 * pages have to be faulted in, it may turn out to be slightly slower so
 274 * callers need to carefully consider what to use. On many architectures,
 275 * get_user_pages_fast simply falls back to get_user_pages.
 276 */
 277int __weak get_user_pages_fast(unsigned long start,
 278                                int nr_pages, int write, struct page **pages)
 279{
 280        struct mm_struct *mm = current->mm;
 281        return get_user_pages_unlocked(current, mm, start, nr_pages,
 282                                       write, 0, pages);
 283}
 284EXPORT_SYMBOL_GPL(get_user_pages_fast);
 285
 286unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 287        unsigned long len, unsigned long prot,
 288        unsigned long flag, unsigned long pgoff)
 289{
 290        unsigned long ret;
 291        struct mm_struct *mm = current->mm;
 292        unsigned long populate;
 293
 294        ret = security_mmap_file(file, prot, flag);
 295        if (!ret) {
 296                down_write(&mm->mmap_sem);
 297                ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
 298                                    &populate);
 299                up_write(&mm->mmap_sem);
 300                if (populate)
 301                        mm_populate(ret, populate);
 302        }
 303        return ret;
 304}
 305
 306unsigned long vm_mmap(struct file *file, unsigned long addr,
 307        unsigned long len, unsigned long prot,
 308        unsigned long flag, unsigned long offset)
 309{
 310        if (unlikely(offset + PAGE_ALIGN(len) < offset))
 311                return -EINVAL;
 312        if (unlikely(offset & ~PAGE_MASK))
 313                return -EINVAL;
 314
 315        return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
 316}
 317EXPORT_SYMBOL(vm_mmap);
 318
 319void kvfree(const void *addr)
 320{
 321        if (is_vmalloc_addr(addr))
 322                vfree(addr);
 323        else
 324                kfree(addr);
 325}
 326EXPORT_SYMBOL(kvfree);
 327
 328static inline void *__page_rmapping(struct page *page)
 329{
 330        unsigned long mapping;
 331
 332        mapping = (unsigned long)page->mapping;
 333        mapping &= ~PAGE_MAPPING_FLAGS;
 334
 335        return (void *)mapping;
 336}
 337
 338/* Neutral page->mapping pointer to address_space or anon_vma or other */
 339void *page_rmapping(struct page *page)
 340{
 341        page = compound_head(page);
 342        return __page_rmapping(page);
 343}
 344
 345struct anon_vma *page_anon_vma(struct page *page)
 346{
 347        unsigned long mapping;
 348
 349        page = compound_head(page);
 350        mapping = (unsigned long)page->mapping;
 351        if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
 352                return NULL;
 353        return __page_rmapping(page);
 354}
 355
 356struct address_space *page_mapping(struct page *page)
 357{
 358        unsigned long mapping;
 359
 360        /* This happens if someone calls flush_dcache_page on slab page */
 361        if (unlikely(PageSlab(page)))
 362                return NULL;
 363
 364        if (unlikely(PageSwapCache(page))) {
 365                swp_entry_t entry;
 366
 367                entry.val = page_private(page);
 368                return swap_address_space(entry);
 369        }
 370
 371        mapping = (unsigned long)page->mapping;
 372        if (mapping & PAGE_MAPPING_FLAGS)
 373                return NULL;
 374        return page->mapping;
 375}
 376
 377int overcommit_ratio_handler(struct ctl_table *table, int write,
 378                             void __user *buffer, size_t *lenp,
 379                             loff_t *ppos)
 380{
 381        int ret;
 382
 383        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 384        if (ret == 0 && write)
 385                sysctl_overcommit_kbytes = 0;
 386        return ret;
 387}
 388
 389int overcommit_kbytes_handler(struct ctl_table *table, int write,
 390                             void __user *buffer, size_t *lenp,
 391                             loff_t *ppos)
 392{
 393        int ret;
 394
 395        ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 396        if (ret == 0 && write)
 397                sysctl_overcommit_ratio = 0;
 398        return ret;
 399}
 400
 401/*
 402 * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
 403 */
 404unsigned long vm_commit_limit(void)
 405{
 406        unsigned long allowed;
 407
 408        if (sysctl_overcommit_kbytes)
 409                allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
 410        else
 411                allowed = ((totalram_pages - hugetlb_total_pages())
 412                           * sysctl_overcommit_ratio / 100);
 413        allowed += total_swap_pages;
 414
 415        return allowed;
 416}
 417
 418/**
 419 * get_cmdline() - copy the cmdline value to a buffer.
 420 * @task:     the task whose cmdline value to copy.
 421 * @buffer:   the buffer to copy to.
 422 * @buflen:   the length of the buffer. Larger cmdline values are truncated
 423 *            to this length.
 424 * Returns the size of the cmdline field copied. Note that the copy does
 425 * not guarantee an ending NULL byte.
 426 */
 427int get_cmdline(struct task_struct *task, char *buffer, int buflen)
 428{
 429        int res = 0;
 430        unsigned int len;
 431        struct mm_struct *mm = get_task_mm(task);
 432        if (!mm)
 433                goto out;
 434        if (!mm->arg_end)
 435                goto out_mm;    /* Shh! No looking before we're done */
 436
 437        len = mm->arg_end - mm->arg_start;
 438
 439        if (len > buflen)
 440                len = buflen;
 441
 442        res = access_process_vm(task, mm->arg_start, buffer, len, 0);
 443
 444        /*
 445         * If the nul at the end of args has been overwritten, then
 446         * assume application is using setproctitle(3).
 447         */
 448        if (res > 0 && buffer[res-1] != '\0' && len < buflen) {
 449                len = strnlen(buffer, res);
 450                if (len < res) {
 451                        res = len;
 452                } else {
 453                        len = mm->env_end - mm->env_start;
 454                        if (len > buflen - res)
 455                                len = buflen - res;
 456                        res += access_process_vm(task, mm->env_start,
 457                                                 buffer+res, len, 0);
 458                        res = strnlen(buffer, res);
 459                }
 460        }
 461out_mm:
 462        mmput(mm);
 463out:
 464        return res;
 465}
 466