1#ifndef _LINUX_MEMREMAP_H_ 2#define _LINUX_MEMREMAP_H_ 3#include <linux/mm.h> 4#include <linux/ioport.h> 5#include <linux/percpu-refcount.h> 6 7struct resource; 8struct device; 9 10/** 11 * struct vmem_altmap - pre-allocated storage for vmemmap_populate 12 * @base_pfn: base of the entire dev_pagemap mapping 13 * @reserve: pages mapped, but reserved for driver use (relative to @base) 14 * @free: free pages set aside in the mapping for memmap storage 15 * @align: pages reserved to meet allocation alignments 16 * @alloc: track pages consumed, private to vmemmap_populate() 17 */ 18struct vmem_altmap { 19 const unsigned long base_pfn; 20 const unsigned long reserve; 21 unsigned long free; 22 unsigned long align; 23 unsigned long alloc; 24}; 25 26unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); 27void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); 28 29#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE) 30struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); 31#else 32static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 33{ 34 return NULL; 35} 36#endif 37 38/* 39 * Specialize ZONE_DEVICE memory into multiple types each having differents 40 * usage. 41 * 42 * MEMORY_DEVICE_PUBLIC: 43 * Persistent device memory (pmem): struct page might be allocated in different 44 * memory and architecture might want to perform special actions. It is similar 45 * to regular memory, in that the CPU can access it transparently. However, 46 * it is likely to have different bandwidth and latency than regular memory. 47 * See Documentation/nvdimm/nvdimm.txt for more information. 48 * 49 * MEMORY_HMM: 50 * Device memory that is not directly addressable by the CPU: CPU can neither 51 * read nor write _UNADDRESSABLE memory. In this case, we do still have struct 52 * pages backing the device memory. Doing so simplifies the implementation, but 53 * it is important to remember that there are certain points at which the struct 54 * page must be treated as an opaque object, rather than a "normal" struct page. 55 * A more complete discussion of unaddressable memory may be found in 56 * include/linux/hmm.h and Documentation/vm/hmm.txt. 57 */ 58enum memory_type { 59 MEMORY_DEVICE_PUBLIC = 0, 60 MEMORY_HMM, 61}; 62 63/* 64 * For MEMORY_HMM we use ZONE_DEVICE and extend it with two callbacks: 65 * page_fault() 66 * page_free() 67 * 68 * Additional notes about MEMORY_DEVICE_PRIVATE may be found in 69 * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief 70 * explanation in include/linux/memory_hotplug.h. 71 * 72 * The page_fault() callback must migrate page back, from device memory to 73 * system memory, so that the CPU can access it. This might fail for various 74 * reasons (device issues, device have been unplugged, ...). When such error 75 * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and 76 * set the CPU page table entry to "poisoned". 77 * 78 * Note that because memory cgroup charges are transferred to the device memory, 79 * this should never fail due to memory restrictions. However, allocation 80 * of a regular system page might still fail because we are out of memory. If 81 * that happens, the page_fault() callback must return VM_FAULT_OOM. 82 * 83 * The page_fault() callback can also try to migrate back multiple pages in one 84 * chunk, as an optimization. It must, however, prioritize the faulting address 85 * over all the others. 86 * 87 * 88 * The page_free() callback is called once the page refcount reaches 1 89 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. 90 * This allows the device driver to implement its own memory management.) 91 */ 92typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, 93 unsigned long addr, 94 struct page *page, 95 unsigned int flags, 96 pmd_t *pmdp); 97typedef void (*dev_page_free_t)(struct page *page, void *data); 98 99/** 100 * struct dev_pagemap - metadata for ZONE_DEVICE mappings 101 * @altmap: pre-allocated/reserved memory for vmemmap allocations 102 * @page_fault: callback when CPU fault on an un-addressable device page 103 * @page_free: free page callback when page refcount reach 1 104 * @res: physical address range covered by @ref 105 * @ref: reference count that pins the devm_memremap_pages() mapping 106 * @dev: host device of the mapping for debug 107 * @data: privata data pointer for page_free 108 * @type: memory type: see MEMORY_* above 109 */ 110struct dev_pagemap { 111 struct vmem_altmap *altmap; 112 dev_page_fault_t page_fault; 113 dev_page_free_t page_free; 114 const struct resource *res; 115 struct percpu_ref *ref; 116 struct device *dev; 117 void *data; 118 enum memory_type type; 119}; 120 121#ifdef CONFIG_ZONE_DEVICE 122void *devm_memremap_pages(struct device *dev, struct resource *res, 123 struct percpu_ref *ref, struct vmem_altmap *altmap); 124struct dev_pagemap *find_dev_pagemap(resource_size_t phys); 125 126static inline bool is_hmm_page(const struct page *page) 127{ 128 /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */ 129 return ((page_zonenum(page) == ZONE_DEVICE) && 130 (page->pgmap->type == MEMORY_HMM)); 131} 132#else 133static inline void *devm_memremap_pages(struct device *dev, 134 struct resource *res, struct percpu_ref *ref, 135 struct vmem_altmap *altmap) 136{ 137 /* 138 * Fail attempts to call devm_memremap_pages() without 139 * ZONE_DEVICE support enabled, this requires callers to fall 140 * back to plain devm_memremap() based on config 141 */ 142 WARN_ON_ONCE(1); 143 return ERR_PTR(-ENXIO); 144} 145 146static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) 147{ 148 return NULL; 149} 150 151static inline bool is_hmm_page(const struct page *page) 152{ 153 return false; 154} 155#endif 156 157/** 158 * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn 159 * @pfn: page frame number to lookup page_map 160 * @pgmap: optional known pgmap that already has a reference 161 * 162 * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the 163 * same mapping. 164 */ 165static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, 166 struct dev_pagemap *pgmap) 167{ 168 const struct resource *res = pgmap ? pgmap->res : NULL; 169 resource_size_t phys = PFN_PHYS(pfn); 170 171 /* 172 * In the cached case we're already holding a live reference so 173 * we can simply do a blind increment 174 */ 175 if (res && phys >= res->start && phys <= res->end) { 176 percpu_ref_get(pgmap->ref); 177 return pgmap; 178 } 179 180 /* fall back to slow path lookup */ 181 rcu_read_lock(); 182 pgmap = find_dev_pagemap(phys); 183 if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) 184 pgmap = NULL; 185 rcu_read_unlock(); 186 187 return pgmap; 188} 189 190static inline void put_dev_pagemap(struct dev_pagemap *pgmap) 191{ 192 if (pgmap) { 193 WARN_ON(percpu_ref_is_zero(pgmap->ref)); 194 percpu_ref_put(pgmap->ref); 195 } 196} 197#endif /* _LINUX_MEMREMAP_H_ */ 198