linux/include/linux/memremap.h
<<
>>
Prefs
   1#ifndef _LINUX_MEMREMAP_H_
   2#define _LINUX_MEMREMAP_H_
   3#include <linux/mm.h>
   4#include <linux/ioport.h>
   5#include <linux/percpu-refcount.h>
   6
   7struct resource;
   8struct device;
   9
  10/**
  11 * struct vmem_altmap - pre-allocated storage for vmemmap_populate
  12 * @base_pfn: base of the entire dev_pagemap mapping
  13 * @reserve: pages mapped, but reserved for driver use (relative to @base)
  14 * @free: free pages set aside in the mapping for memmap storage
  15 * @align: pages reserved to meet allocation alignments
  16 * @alloc: track pages consumed, private to vmemmap_populate()
  17 */
  18struct vmem_altmap {
  19        const unsigned long base_pfn;
  20        const unsigned long reserve;
  21        unsigned long free;
  22        unsigned long align;
  23        unsigned long alloc;
  24};
  25
  26unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  27void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
  28
  29#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
  30struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
  31#else
  32static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
  33{
  34        return NULL;
  35}
  36#endif
  37
  38/*
  39 * Specialize ZONE_DEVICE memory into multiple types each having differents
  40 * usage.
  41 *
  42 * MEMORY_DEVICE_PUBLIC:
  43 * Persistent device memory (pmem): struct page might be allocated in different
  44 * memory and architecture might want to perform special actions. It is similar
  45 * to regular memory, in that the CPU can access it transparently. However,
  46 * it is likely to have different bandwidth and latency than regular memory.
  47 * See Documentation/nvdimm/nvdimm.txt for more information.
  48 *
  49 * MEMORY_HMM:
  50 * Device memory that is not directly addressable by the CPU: CPU can neither
  51 * read nor write _UNADDRESSABLE memory. In this case, we do still have struct
  52 * pages backing the device memory. Doing so simplifies the implementation, but
  53 * it is important to remember that there are certain points at which the struct
  54 * page must be treated as an opaque object, rather than a "normal" struct page.
  55 * A more complete discussion of unaddressable memory may be found in
  56 * include/linux/hmm.h and Documentation/vm/hmm.txt.
  57 */
  58enum memory_type {
  59        MEMORY_DEVICE_PUBLIC = 0,
  60        MEMORY_HMM,
  61};
  62
  63/*
  64 * For MEMORY_HMM we use ZONE_DEVICE and extend it with two callbacks:
  65 *   page_fault()
  66 *   page_free()
  67 *
  68 * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
  69 * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief
  70 * explanation in include/linux/memory_hotplug.h.
  71 *
  72 * The page_fault() callback must migrate page back, from device memory to
  73 * system memory, so that the CPU can access it. This might fail for various
  74 * reasons (device issues,  device have been unplugged, ...). When such error
  75 * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
  76 * set the CPU page table entry to "poisoned".
  77 *
  78 * Note that because memory cgroup charges are transferred to the device memory,
  79 * this should never fail due to memory restrictions. However, allocation
  80 * of a regular system page might still fail because we are out of memory. If
  81 * that happens, the page_fault() callback must return VM_FAULT_OOM.
  82 *
  83 * The page_fault() callback can also try to migrate back multiple pages in one
  84 * chunk, as an optimization. It must, however, prioritize the faulting address
  85 * over all the others.
  86 *
  87 *
  88 * The page_free() callback is called once the page refcount reaches 1
  89 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
  90 * This allows the device driver to implement its own memory management.)
  91 */
  92typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
  93                                unsigned long addr,
  94                                struct page *page,
  95                                unsigned int flags,
  96                                pmd_t *pmdp);
  97typedef void (*dev_page_free_t)(struct page *page, void *data);
  98
  99/**
 100 * struct dev_pagemap - metadata for ZONE_DEVICE mappings
 101 * @altmap: pre-allocated/reserved memory for vmemmap allocations
 102 * @page_fault: callback when CPU fault on an un-addressable device page
 103 * @page_free: free page callback when page refcount reach 1
 104 * @res: physical address range covered by @ref
 105 * @ref: reference count that pins the devm_memremap_pages() mapping
 106 * @dev: host device of the mapping for debug
 107 * @data: privata data pointer for page_free
 108 * @type: memory type: see MEMORY_* above
 109 */
 110struct dev_pagemap {
 111        struct vmem_altmap *altmap;
 112        dev_page_fault_t page_fault;
 113        dev_page_free_t page_free;
 114        const struct resource *res;
 115        struct percpu_ref *ref;
 116        struct device *dev;
 117        void *data;
 118        enum memory_type type;
 119};
 120
 121#ifdef CONFIG_ZONE_DEVICE
 122void *devm_memremap_pages(struct device *dev, struct resource *res,
 123                struct percpu_ref *ref, struct vmem_altmap *altmap);
 124struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
 125
 126static inline bool is_hmm_page(const struct page *page)
 127{
 128        /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */
 129        return ((page_zonenum(page) == ZONE_DEVICE) &&
 130                (page->pgmap->type == MEMORY_HMM));
 131}
 132#else
 133static inline void *devm_memremap_pages(struct device *dev,
 134                struct resource *res, struct percpu_ref *ref,
 135                struct vmem_altmap *altmap)
 136{
 137        /*
 138         * Fail attempts to call devm_memremap_pages() without
 139         * ZONE_DEVICE support enabled, this requires callers to fall
 140         * back to plain devm_memremap() based on config
 141         */
 142        WARN_ON_ONCE(1);
 143        return ERR_PTR(-ENXIO);
 144}
 145
 146static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
 147{
 148        return NULL;
 149}
 150
 151static inline bool is_hmm_page(const struct page *page)
 152{
 153        return false;
 154}
 155#endif
 156
 157/**
 158 * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
 159 * @pfn: page frame number to lookup page_map
 160 * @pgmap: optional known pgmap that already has a reference
 161 *
 162 * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
 163 * same mapping.
 164 */
 165static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 166                struct dev_pagemap *pgmap)
 167{
 168        const struct resource *res = pgmap ? pgmap->res : NULL;
 169        resource_size_t phys = PFN_PHYS(pfn);
 170
 171        /*
 172         * In the cached case we're already holding a live reference so
 173         * we can simply do a blind increment
 174         */
 175        if (res && phys >= res->start && phys <= res->end) {
 176                percpu_ref_get(pgmap->ref);
 177                return pgmap;
 178        }
 179
 180        /* fall back to slow path lookup */
 181        rcu_read_lock();
 182        pgmap = find_dev_pagemap(phys);
 183        if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
 184                pgmap = NULL;
 185        rcu_read_unlock();
 186
 187        return pgmap;
 188}
 189
 190static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 191{
 192        if (pgmap) {
 193                WARN_ON(percpu_ref_is_zero(pgmap->ref));
 194                percpu_ref_put(pgmap->ref);
 195        }
 196}
 197#endif /* _LINUX_MEMREMAP_H_ */
 198