linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1#ifndef _LINUX_MMU_NOTIFIER_H
   2#define _LINUX_MMU_NOTIFIER_H
   3
   4#include <linux/list.h>
   5#include <linux/spinlock.h>
   6#include <linux/mm_types.h>
   7#include <linux/srcu.h>
   8
   9struct mmu_notifier;
  10struct mmu_notifier_ops;
  11
  12#ifdef CONFIG_MMU_NOTIFIER
  13
  14/*
  15 * The mmu notifier_mm structure is allocated and installed in
  16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  17 * critical section and it's released only when mm_count reaches zero
  18 * in mmdrop().
  19 */
  20struct mmu_notifier_mm {
  21        /* all mmu notifiers registerd in this mm are queued in this list */
  22        struct hlist_head list;
  23        /* to serialize the list modifications and hlist_unhashed */
  24        spinlock_t lock;
  25};
  26
  27struct mmu_notifier_ops {
  28        /*
  29         * Called either by mmu_notifier_unregister or when the mm is
  30         * being destroyed by exit_mmap, always before all pages are
  31         * freed. This can run concurrently with other mmu notifier
  32         * methods (the ones invoked outside the mm context) and it
  33         * should tear down all secondary mmu mappings and freeze the
  34         * secondary mmu. If this method isn't implemented you've to
  35         * be sure that nothing could possibly write to the pages
  36         * through the secondary mmu by the time the last thread with
  37         * tsk->mm == mm exits.
  38         *
  39         * As side note: the pages freed after ->release returns could
  40         * be immediately reallocated by the gart at an alias physical
  41         * address with a different cache model, so if ->release isn't
  42         * implemented because all _software_ driven memory accesses
  43         * through the secondary mmu are terminated by the time the
  44         * last thread of this mm quits, you've also to be sure that
  45         * speculative _hardware_ operations can't allocate dirty
  46         * cachelines in the cpu that could not be snooped and made
  47         * coherent with the other read and write operations happening
  48         * through the gart alias address, so leading to memory
  49         * corruption.
  50         */
  51        void (*release)(struct mmu_notifier *mn,
  52                        struct mm_struct *mm);
  53
  54        /*
  55         * clear_flush_young is called after the VM is
  56         * test-and-clearing the young/accessed bitflag in the
  57         * pte. This way the VM will provide proper aging to the
  58         * accesses to the page through the secondary MMUs and not
  59         * only to the ones through the Linux pte.
  60         */
  61        int (*clear_flush_young)(struct mmu_notifier *mn,
  62                                 struct mm_struct *mm,
  63                                 unsigned long address);
  64
  65        /*
  66         * test_young is called to check the young/accessed bitflag in
  67         * the secondary pte. This is used to know if the page is
  68         * frequently used without actually clearing the flag or tearing
  69         * down the secondary mapping on the page.
  70         */
  71        int (*test_young)(struct mmu_notifier *mn,
  72                          struct mm_struct *mm,
  73                          unsigned long address);
  74
  75        /*
  76         * change_pte is called in cases that pte mapping to page is changed:
  77         * for example, when ksm remaps pte to point to a new shared page.
  78         */
  79        void (*change_pte)(struct mmu_notifier *mn,
  80                           struct mm_struct *mm,
  81                           unsigned long address,
  82                           pte_t pte);
  83
  84        /*
  85         * Before this is invoked any secondary MMU is still ok to
  86         * read/write to the page previously pointed to by the Linux
  87         * pte because the page hasn't been freed yet and it won't be
  88         * freed until this returns. If required set_page_dirty has to
  89         * be called internally to this method.
  90         */
  91        void (*invalidate_page)(struct mmu_notifier *mn,
  92                                struct mm_struct *mm,
  93                                unsigned long address);
  94
  95        /*
  96         * invalidate_range_start() and invalidate_range_end() must be
  97         * paired and are called only when the mmap_sem and/or the
  98         * locks protecting the reverse maps are held. The subsystem
  99         * must guarantee that no additional references are taken to
 100         * the pages in the range established between the call to
 101         * invalidate_range_start() and the matching call to
 102         * invalidate_range_end().
 103         *
 104         * Invalidation of multiple concurrent ranges may be
 105         * optionally permitted by the driver. Either way the
 106         * establishment of sptes is forbidden in the range passed to
 107         * invalidate_range_begin/end for the whole duration of the
 108         * invalidate_range_begin/end critical section.
 109         *
 110         * invalidate_range_start() is called when all pages in the
 111         * range are still mapped and have at least a refcount of one.
 112         *
 113         * invalidate_range_end() is called when all pages in the
 114         * range have been unmapped and the pages have been freed by
 115         * the VM.
 116         *
 117         * The VM will remove the page table entries and potentially
 118         * the page between invalidate_range_start() and
 119         * invalidate_range_end(). If the page must not be freed
 120         * because of pending I/O or other circumstances then the
 121         * invalidate_range_start() callback (or the initial mapping
 122         * by the driver) must make sure that the refcount is kept
 123         * elevated.
 124         *
 125         * If the driver increases the refcount when the pages are
 126         * initially mapped into an address space then either
 127         * invalidate_range_start() or invalidate_range_end() may
 128         * decrease the refcount. If the refcount is decreased on
 129         * invalidate_range_start() then the VM can free pages as page
 130         * table entries are removed.  If the refcount is only
 131         * droppped on invalidate_range_end() then the driver itself
 132         * will drop the last refcount but it must take care to flush
 133         * any secondary tlb before doing the final free on the
 134         * page. Pages will no longer be referenced by the linux
 135         * address space but may still be referenced by sptes until
 136         * the last refcount is dropped.
 137         */
 138        void (*invalidate_range_start)(struct mmu_notifier *mn,
 139                                       struct mm_struct *mm,
 140                                       unsigned long start, unsigned long end);
 141        void (*invalidate_range_end)(struct mmu_notifier *mn,
 142                                     struct mm_struct *mm,
 143                                     unsigned long start, unsigned long end);
 144};
 145
 146/*
 147 * The notifier chains are protected by mmap_sem and/or the reverse map
 148 * semaphores. Notifier chains are only changed when all reverse maps and
 149 * the mmap_sem locks are taken.
 150 *
 151 * Therefore notifier chains can only be traversed when either
 152 *
 153 * 1. mmap_sem is held.
 154 * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
 155 * 3. No other concurrent thread can access the list (release)
 156 */
 157struct mmu_notifier {
 158        struct hlist_node hlist;
 159        const struct mmu_notifier_ops *ops;
 160};
 161
 162static inline int mm_has_notifiers(struct mm_struct *mm)
 163{
 164        return unlikely(mm->mmu_notifier_mm);
 165}
 166
 167extern int mmu_notifier_register(struct mmu_notifier *mn,
 168                                 struct mm_struct *mm);
 169extern int __mmu_notifier_register(struct mmu_notifier *mn,
 170                                   struct mm_struct *mm);
 171extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 172                                    struct mm_struct *mm);
 173extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 174extern void __mmu_notifier_release(struct mm_struct *mm);
 175extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 176                                          unsigned long address);
 177extern int __mmu_notifier_test_young(struct mm_struct *mm,
 178                                     unsigned long address);
 179extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 180                                      unsigned long address, pte_t pte);
 181extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 182                                          unsigned long address);
 183extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 184                                  unsigned long start, unsigned long end);
 185extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 186                                  unsigned long start, unsigned long end);
 187
 188static inline void mmu_notifier_release(struct mm_struct *mm)
 189{
 190        if (mm_has_notifiers(mm))
 191                __mmu_notifier_release(mm);
 192}
 193
 194static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 195                                          unsigned long address)
 196{
 197        if (mm_has_notifiers(mm))
 198                return __mmu_notifier_clear_flush_young(mm, address);
 199        return 0;
 200}
 201
 202static inline int mmu_notifier_test_young(struct mm_struct *mm,
 203                                          unsigned long address)
 204{
 205        if (mm_has_notifiers(mm))
 206                return __mmu_notifier_test_young(mm, address);
 207        return 0;
 208}
 209
 210static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 211                                           unsigned long address, pte_t pte)
 212{
 213        if (mm_has_notifiers(mm))
 214                __mmu_notifier_change_pte(mm, address, pte);
 215}
 216
 217static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 218                                          unsigned long address)
 219{
 220        if (mm_has_notifiers(mm))
 221                __mmu_notifier_invalidate_page(mm, address);
 222}
 223
 224static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 225                                  unsigned long start, unsigned long end)
 226{
 227        if (mm_has_notifiers(mm))
 228                __mmu_notifier_invalidate_range_start(mm, start, end);
 229}
 230
 231static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 232                                  unsigned long start, unsigned long end)
 233{
 234        if (mm_has_notifiers(mm))
 235                __mmu_notifier_invalidate_range_end(mm, start, end);
 236}
 237
 238static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 239{
 240        mm->mmu_notifier_mm = NULL;
 241}
 242
 243static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 244{
 245        if (mm_has_notifiers(mm))
 246                __mmu_notifier_mm_destroy(mm);
 247}
 248
 249#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 250({                                                                      \
 251        int __young;                                                    \
 252        struct vm_area_struct *___vma = __vma;                          \
 253        unsigned long ___address = __address;                           \
 254        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 255        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 256                                                  ___address);          \
 257        __young;                                                        \
 258})
 259
 260#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 261({                                                                      \
 262        int __young;                                                    \
 263        struct vm_area_struct *___vma = __vma;                          \
 264        unsigned long ___address = __address;                           \
 265        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 266        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 267                                                  ___address);          \
 268        __young;                                                        \
 269})
 270
 271/*
 272 * set_pte_at_notify() sets the pte _after_ running the notifier.
 273 * This is safe to start by updating the secondary MMUs, because the primary MMU
 274 * pte invalidate must have already happened with a ptep_clear_flush() before
 275 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 276 * required when we change both the protection of the mapping from read-only to
 277 * read-write and the pfn (like during copy on write page faults). Otherwise the
 278 * old page would remain mapped readonly in the secondary MMUs after the new
 279 * page is already writable by some CPU through the primary MMU.
 280 */
 281#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 282({                                                                      \
 283        struct mm_struct *___mm = __mm;                                 \
 284        unsigned long ___address = __address;                           \
 285        pte_t ___pte = __pte;                                           \
 286                                                                        \
 287        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 288        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 289})
 290
 291#else /* CONFIG_MMU_NOTIFIER */
 292
 293static inline void mmu_notifier_release(struct mm_struct *mm)
 294{
 295}
 296
 297static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 298                                          unsigned long address)
 299{
 300        return 0;
 301}
 302
 303static inline int mmu_notifier_test_young(struct mm_struct *mm,
 304                                          unsigned long address)
 305{
 306        return 0;
 307}
 308
 309static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 310                                           unsigned long address, pte_t pte)
 311{
 312}
 313
 314static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 315                                          unsigned long address)
 316{
 317}
 318
 319static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 320                                  unsigned long start, unsigned long end)
 321{
 322}
 323
 324static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 325                                  unsigned long start, unsigned long end)
 326{
 327}
 328
 329static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 330{
 331}
 332
 333static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 334{
 335}
 336
 337#define ptep_clear_flush_young_notify ptep_clear_flush_young
 338#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 339#define set_pte_at_notify set_pte_at
 340
 341#endif /* CONFIG_MMU_NOTIFIER */
 342
 343#endif /* _LINUX_MMU_NOTIFIER_H */
 344