linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1#ifndef _LINUX_MMU_NOTIFIER_H
   2#define _LINUX_MMU_NOTIFIER_H
   3
   4#include <linux/list.h>
   5#include <linux/spinlock.h>
   6#include <linux/mm_types.h>
   7
   8struct mmu_notifier;
   9struct mmu_notifier_ops;
  10
  11#ifdef CONFIG_MMU_NOTIFIER
  12
  13/*
  14 * The mmu notifier_mm structure is allocated and installed in
  15 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  16 * critical section and it's released only when mm_count reaches zero
  17 * in mmdrop().
  18 */
  19struct mmu_notifier_mm {
  20        /* all mmu notifiers registerd in this mm are queued in this list */
  21        struct hlist_head list;
  22        /* to serialize the list modifications and hlist_unhashed */
  23        spinlock_t lock;
  24};
  25
  26struct mmu_notifier_ops {
  27        /*
  28         * Called either by mmu_notifier_unregister or when the mm is
  29         * being destroyed by exit_mmap, always before all pages are
  30         * freed. This can run concurrently with other mmu notifier
  31         * methods (the ones invoked outside the mm context) and it
  32         * should tear down all secondary mmu mappings and freeze the
  33         * secondary mmu. If this method isn't implemented you've to
  34         * be sure that nothing could possibly write to the pages
  35         * through the secondary mmu by the time the last thread with
  36         * tsk->mm == mm exits.
  37         *
  38         * As side note: the pages freed after ->release returns could
  39         * be immediately reallocated by the gart at an alias physical
  40         * address with a different cache model, so if ->release isn't
  41         * implemented because all _software_ driven memory accesses
  42         * through the secondary mmu are terminated by the time the
  43         * last thread of this mm quits, you've also to be sure that
  44         * speculative _hardware_ operations can't allocate dirty
  45         * cachelines in the cpu that could not be snooped and made
  46         * coherent with the other read and write operations happening
  47         * through the gart alias address, so leading to memory
  48         * corruption.
  49         */
  50        void (*release)(struct mmu_notifier *mn,
  51                        struct mm_struct *mm);
  52
  53        /*
  54         * clear_flush_young is called after the VM is
  55         * test-and-clearing the young/accessed bitflag in the
  56         * pte. This way the VM will provide proper aging to the
  57         * accesses to the page through the secondary MMUs and not
  58         * only to the ones through the Linux pte.
  59         */
  60        int (*clear_flush_young)(struct mmu_notifier *mn,
  61                                 struct mm_struct *mm,
  62                                 unsigned long address);
  63
  64        /*
  65         * change_pte is called in cases that pte mapping to page is changed:
  66         * for example, when ksm remaps pte to point to a new shared page.
  67         */
  68        void (*change_pte)(struct mmu_notifier *mn,
  69                           struct mm_struct *mm,
  70                           unsigned long address,
  71                           pte_t pte);
  72
  73        /*
  74         * Before this is invoked any secondary MMU is still ok to
  75         * read/write to the page previously pointed to by the Linux
  76         * pte because the page hasn't been freed yet and it won't be
  77         * freed until this returns. If required set_page_dirty has to
  78         * be called internally to this method.
  79         */
  80        void (*invalidate_page)(struct mmu_notifier *mn,
  81                                struct mm_struct *mm,
  82                                unsigned long address);
  83
  84        /*
  85         * invalidate_range_start() and invalidate_range_end() must be
  86         * paired and are called only when the mmap_sem and/or the
  87         * locks protecting the reverse maps are held. The subsystem
  88         * must guarantee that no additional references are taken to
  89         * the pages in the range established between the call to
  90         * invalidate_range_start() and the matching call to
  91         * invalidate_range_end().
  92         *
  93         * Invalidation of multiple concurrent ranges may be
  94         * optionally permitted by the driver. Either way the
  95         * establishment of sptes is forbidden in the range passed to
  96         * invalidate_range_begin/end for the whole duration of the
  97         * invalidate_range_begin/end critical section.
  98         *
  99         * invalidate_range_start() is called when all pages in the
 100         * range are still mapped and have at least a refcount of one.
 101         *
 102         * invalidate_range_end() is called when all pages in the
 103         * range have been unmapped and the pages have been freed by
 104         * the VM.
 105         *
 106         * The VM will remove the page table entries and potentially
 107         * the page between invalidate_range_start() and
 108         * invalidate_range_end(). If the page must not be freed
 109         * because of pending I/O or other circumstances then the
 110         * invalidate_range_start() callback (or the initial mapping
 111         * by the driver) must make sure that the refcount is kept
 112         * elevated.
 113         *
 114         * If the driver increases the refcount when the pages are
 115         * initially mapped into an address space then either
 116         * invalidate_range_start() or invalidate_range_end() may
 117         * decrease the refcount. If the refcount is decreased on
 118         * invalidate_range_start() then the VM can free pages as page
 119         * table entries are removed.  If the refcount is only
 120         * droppped on invalidate_range_end() then the driver itself
 121         * will drop the last refcount but it must take care to flush
 122         * any secondary tlb before doing the final free on the
 123         * page. Pages will no longer be referenced by the linux
 124         * address space but may still be referenced by sptes until
 125         * the last refcount is dropped.
 126         */
 127        void (*invalidate_range_start)(struct mmu_notifier *mn,
 128                                       struct mm_struct *mm,
 129                                       unsigned long start, unsigned long end);
 130        void (*invalidate_range_end)(struct mmu_notifier *mn,
 131                                     struct mm_struct *mm,
 132                                     unsigned long start, unsigned long end);
 133};
 134
 135/*
 136 * The notifier chains are protected by mmap_sem and/or the reverse map
 137 * semaphores. Notifier chains are only changed when all reverse maps and
 138 * the mmap_sem locks are taken.
 139 *
 140 * Therefore notifier chains can only be traversed when either
 141 *
 142 * 1. mmap_sem is held.
 143 * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
 144 * 3. No other concurrent thread can access the list (release)
 145 */
 146struct mmu_notifier {
 147        struct hlist_node hlist;
 148        const struct mmu_notifier_ops *ops;
 149};
 150
 151static inline int mm_has_notifiers(struct mm_struct *mm)
 152{
 153        return unlikely(mm->mmu_notifier_mm);
 154}
 155
 156extern int mmu_notifier_register(struct mmu_notifier *mn,
 157                                 struct mm_struct *mm);
 158extern int __mmu_notifier_register(struct mmu_notifier *mn,
 159                                   struct mm_struct *mm);
 160extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 161                                    struct mm_struct *mm);
 162extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 163extern void __mmu_notifier_release(struct mm_struct *mm);
 164extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 165                                          unsigned long address);
 166extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 167                                      unsigned long address, pte_t pte);
 168extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 169                                          unsigned long address);
 170extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 171                                  unsigned long start, unsigned long end);
 172extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 173                                  unsigned long start, unsigned long end);
 174
 175static inline void mmu_notifier_release(struct mm_struct *mm)
 176{
 177        if (mm_has_notifiers(mm))
 178                __mmu_notifier_release(mm);
 179}
 180
 181static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 182                                          unsigned long address)
 183{
 184        if (mm_has_notifiers(mm))
 185                return __mmu_notifier_clear_flush_young(mm, address);
 186        return 0;
 187}
 188
 189static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 190                                           unsigned long address, pte_t pte)
 191{
 192        if (mm_has_notifiers(mm))
 193                __mmu_notifier_change_pte(mm, address, pte);
 194}
 195
 196static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 197                                          unsigned long address)
 198{
 199        if (mm_has_notifiers(mm))
 200                __mmu_notifier_invalidate_page(mm, address);
 201}
 202
 203static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 204                                  unsigned long start, unsigned long end)
 205{
 206        if (mm_has_notifiers(mm))
 207                __mmu_notifier_invalidate_range_start(mm, start, end);
 208}
 209
 210static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 211                                  unsigned long start, unsigned long end)
 212{
 213        if (mm_has_notifiers(mm))
 214                __mmu_notifier_invalidate_range_end(mm, start, end);
 215}
 216
 217static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 218{
 219        mm->mmu_notifier_mm = NULL;
 220}
 221
 222static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 223{
 224        if (mm_has_notifiers(mm))
 225                __mmu_notifier_mm_destroy(mm);
 226}
 227
 228/*
 229 * These two macros will sometime replace ptep_clear_flush.
 230 * ptep_clear_flush is impleemnted as macro itself, so this also is
 231 * implemented as a macro until ptep_clear_flush will converted to an
 232 * inline function, to diminish the risk of compilation failure. The
 233 * invalidate_page method over time can be moved outside the PT lock
 234 * and these two macros can be later removed.
 235 */
 236#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 237({                                                                      \
 238        pte_t __pte;                                                    \
 239        struct vm_area_struct *___vma = __vma;                          \
 240        unsigned long ___address = __address;                           \
 241        __pte = ptep_clear_flush(___vma, ___address, __ptep);           \
 242        mmu_notifier_invalidate_page(___vma->vm_mm, ___address);        \
 243        __pte;                                                          \
 244})
 245
 246#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 247({                                                                      \
 248        int __young;                                                    \
 249        struct vm_area_struct *___vma = __vma;                          \
 250        unsigned long ___address = __address;                           \
 251        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 252        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 253                                                  ___address);          \
 254        __young;                                                        \
 255})
 256
 257#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 258({                                                                      \
 259        struct mm_struct *___mm = __mm;                                 \
 260        unsigned long ___address = __address;                           \
 261        pte_t ___pte = __pte;                                           \
 262                                                                        \
 263        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 264        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 265})
 266
 267#else /* CONFIG_MMU_NOTIFIER */
 268
 269static inline void mmu_notifier_release(struct mm_struct *mm)
 270{
 271}
 272
 273static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 274                                          unsigned long address)
 275{
 276        return 0;
 277}
 278
 279static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 280                                           unsigned long address, pte_t pte)
 281{
 282}
 283
 284static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 285                                          unsigned long address)
 286{
 287}
 288
 289static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 290                                  unsigned long start, unsigned long end)
 291{
 292}
 293
 294static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 295                                  unsigned long start, unsigned long end)
 296{
 297}
 298
 299static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 300{
 301}
 302
 303static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 304{
 305}
 306
 307#define ptep_clear_flush_young_notify ptep_clear_flush_young
 308#define ptep_clear_flush_notify ptep_clear_flush
 309#define set_pte_at_notify set_pte_at
 310
 311#endif /* CONFIG_MMU_NOTIFIER */
 312
 313#endif /* _LINUX_MMU_NOTIFIER_H */
 314