linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1#ifndef _LINUX_MMU_NOTIFIER_H
   2#define _LINUX_MMU_NOTIFIER_H
   3
   4#include <linux/list.h>
   5#include <linux/spinlock.h>
   6#include <linux/mm_types.h>
   7#include <linux/srcu.h>
   8
   9struct mmu_notifier;
  10struct mmu_notifier_ops;
  11
  12#ifdef CONFIG_MMU_NOTIFIER
  13
  14/*
  15 * The mmu notifier_mm structure is allocated and installed in
  16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  17 * critical section and it's released only when mm_count reaches zero
  18 * in mmdrop().
  19 */
  20struct mmu_notifier_mm {
  21        /* all mmu notifiers registerd in this mm are queued in this list */
  22        struct hlist_head list;
  23        /* to serialize the list modifications and hlist_unhashed */
  24        spinlock_t lock;
  25};
  26
  27struct mmu_notifier_ops {
  28        /*
  29         * Called either by mmu_notifier_unregister or when the mm is
  30         * being destroyed by exit_mmap, always before all pages are
  31         * freed. This can run concurrently with other mmu notifier
  32         * methods (the ones invoked outside the mm context) and it
  33         * should tear down all secondary mmu mappings and freeze the
  34         * secondary mmu. If this method isn't implemented you've to
  35         * be sure that nothing could possibly write to the pages
  36         * through the secondary mmu by the time the last thread with
  37         * tsk->mm == mm exits.
  38         *
  39         * As side note: the pages freed after ->release returns could
  40         * be immediately reallocated by the gart at an alias physical
  41         * address with a different cache model, so if ->release isn't
  42         * implemented because all _software_ driven memory accesses
  43         * through the secondary mmu are terminated by the time the
  44         * last thread of this mm quits, you've also to be sure that
  45         * speculative _hardware_ operations can't allocate dirty
  46         * cachelines in the cpu that could not be snooped and made
  47         * coherent with the other read and write operations happening
  48         * through the gart alias address, so leading to memory
  49         * corruption.
  50         */
  51        void (*release)(struct mmu_notifier *mn,
  52                        struct mm_struct *mm);
  53
  54        /*
  55         * clear_flush_young is called after the VM is
  56         * test-and-clearing the young/accessed bitflag in the
  57         * pte. This way the VM will provide proper aging to the
  58         * accesses to the page through the secondary MMUs and not
  59         * only to the ones through the Linux pte.
  60         * Start-end is necessary in case the secondary MMU is mapping the page
  61         * at a smaller granularity than the primary MMU.
  62         */
  63        int (*clear_flush_young)(struct mmu_notifier *mn,
  64                                 struct mm_struct *mm,
  65                                 unsigned long start,
  66                                 unsigned long end);
  67
  68        /*
  69         * test_young is called to check the young/accessed bitflag in
  70         * the secondary pte. This is used to know if the page is
  71         * frequently used without actually clearing the flag or tearing
  72         * down the secondary mapping on the page.
  73         */
  74        int (*test_young)(struct mmu_notifier *mn,
  75                          struct mm_struct *mm,
  76                          unsigned long address);
  77
  78        /*
  79         * change_pte is called in cases that pte mapping to page is changed:
  80         * for example, when ksm remaps pte to point to a new shared page.
  81         */
  82        void (*change_pte)(struct mmu_notifier *mn,
  83                           struct mm_struct *mm,
  84                           unsigned long address,
  85                           pte_t pte);
  86
  87        /*
  88         * Before this is invoked any secondary MMU is still ok to
  89         * read/write to the page previously pointed to by the Linux
  90         * pte because the page hasn't been freed yet and it won't be
  91         * freed until this returns. If required set_page_dirty has to
  92         * be called internally to this method.
  93         */
  94        void (*invalidate_page)(struct mmu_notifier *mn,
  95                                struct mm_struct *mm,
  96                                unsigned long address);
  97
  98        /*
  99         * invalidate_range_start() and invalidate_range_end() must be
 100         * paired and are called only when the mmap_sem and/or the
 101         * locks protecting the reverse maps are held. The subsystem
 102         * must guarantee that no additional references are taken to
 103         * the pages in the range established between the call to
 104         * invalidate_range_start() and the matching call to
 105         * invalidate_range_end().
 106         *
 107         * Invalidation of multiple concurrent ranges may be
 108         * optionally permitted by the driver. Either way the
 109         * establishment of sptes is forbidden in the range passed to
 110         * invalidate_range_begin/end for the whole duration of the
 111         * invalidate_range_begin/end critical section.
 112         *
 113         * invalidate_range_start() is called when all pages in the
 114         * range are still mapped and have at least a refcount of one.
 115         *
 116         * invalidate_range_end() is called when all pages in the
 117         * range have been unmapped and the pages have been freed by
 118         * the VM.
 119         *
 120         * The VM will remove the page table entries and potentially
 121         * the page between invalidate_range_start() and
 122         * invalidate_range_end(). If the page must not be freed
 123         * because of pending I/O or other circumstances then the
 124         * invalidate_range_start() callback (or the initial mapping
 125         * by the driver) must make sure that the refcount is kept
 126         * elevated.
 127         *
 128         * If the driver increases the refcount when the pages are
 129         * initially mapped into an address space then either
 130         * invalidate_range_start() or invalidate_range_end() may
 131         * decrease the refcount. If the refcount is decreased on
 132         * invalidate_range_start() then the VM can free pages as page
 133         * table entries are removed.  If the refcount is only
 134         * droppped on invalidate_range_end() then the driver itself
 135         * will drop the last refcount but it must take care to flush
 136         * any secondary tlb before doing the final free on the
 137         * page. Pages will no longer be referenced by the linux
 138         * address space but may still be referenced by sptes until
 139         * the last refcount is dropped.
 140         */
 141        void (*invalidate_range_start)(struct mmu_notifier *mn,
 142                                       struct mm_struct *mm,
 143                                       unsigned long start, unsigned long end);
 144        void (*invalidate_range_end)(struct mmu_notifier *mn,
 145                                     struct mm_struct *mm,
 146                                     unsigned long start, unsigned long end);
 147};
 148
 149/*
 150 * The notifier chains are protected by mmap_sem and/or the reverse map
 151 * semaphores. Notifier chains are only changed when all reverse maps and
 152 * the mmap_sem locks are taken.
 153 *
 154 * Therefore notifier chains can only be traversed when either
 155 *
 156 * 1. mmap_sem is held.
 157 * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
 158 * 3. No other concurrent thread can access the list (release)
 159 */
 160struct mmu_notifier {
 161        struct hlist_node hlist;
 162        const struct mmu_notifier_ops *ops;
 163};
 164
 165static inline int mm_has_notifiers(struct mm_struct *mm)
 166{
 167        return unlikely(mm->mmu_notifier_mm);
 168}
 169
 170extern int mmu_notifier_register(struct mmu_notifier *mn,
 171                                 struct mm_struct *mm);
 172extern int __mmu_notifier_register(struct mmu_notifier *mn,
 173                                   struct mm_struct *mm);
 174extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 175                                    struct mm_struct *mm);
 176extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 177                                               struct mm_struct *mm);
 178extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 179extern void __mmu_notifier_release(struct mm_struct *mm);
 180extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 181                                          unsigned long start,
 182                                          unsigned long end);
 183extern int __mmu_notifier_test_young(struct mm_struct *mm,
 184                                     unsigned long address);
 185extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 186                                      unsigned long address, pte_t pte);
 187extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 188                                          unsigned long address);
 189extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 190                                  unsigned long start, unsigned long end);
 191extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 192                                  unsigned long start, unsigned long end);
 193
 194static inline void mmu_notifier_release(struct mm_struct *mm)
 195{
 196        if (mm_has_notifiers(mm))
 197                __mmu_notifier_release(mm);
 198}
 199
 200static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 201                                          unsigned long start,
 202                                          unsigned long end)
 203{
 204        if (mm_has_notifiers(mm))
 205                return __mmu_notifier_clear_flush_young(mm, start, end);
 206        return 0;
 207}
 208
 209static inline int mmu_notifier_test_young(struct mm_struct *mm,
 210                                          unsigned long address)
 211{
 212        if (mm_has_notifiers(mm))
 213                return __mmu_notifier_test_young(mm, address);
 214        return 0;
 215}
 216
 217static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 218                                           unsigned long address, pte_t pte)
 219{
 220        if (mm_has_notifiers(mm))
 221                __mmu_notifier_change_pte(mm, address, pte);
 222}
 223
 224static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 225                                          unsigned long address)
 226{
 227        if (mm_has_notifiers(mm))
 228                __mmu_notifier_invalidate_page(mm, address);
 229}
 230
 231static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 232                                  unsigned long start, unsigned long end)
 233{
 234        if (mm_has_notifiers(mm))
 235                __mmu_notifier_invalidate_range_start(mm, start, end);
 236}
 237
 238static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 239                                  unsigned long start, unsigned long end)
 240{
 241        if (mm_has_notifiers(mm))
 242                __mmu_notifier_invalidate_range_end(mm, start, end);
 243}
 244
 245static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 246{
 247        mm->mmu_notifier_mm = NULL;
 248}
 249
 250static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 251{
 252        if (mm_has_notifiers(mm))
 253                __mmu_notifier_mm_destroy(mm);
 254}
 255
 256#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 257({                                                                      \
 258        int __young;                                                    \
 259        struct vm_area_struct *___vma = __vma;                          \
 260        unsigned long ___address = __address;                           \
 261        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 262        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 263                                                  ___address,           \
 264                                                  ___address +          \
 265                                                        PAGE_SIZE);     \
 266        __young;                                                        \
 267})
 268
 269#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 270({                                                                      \
 271        int __young;                                                    \
 272        struct vm_area_struct *___vma = __vma;                          \
 273        unsigned long ___address = __address;                           \
 274        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 275        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 276                                                  ___address,           \
 277                                                  ___address +          \
 278                                                        PMD_SIZE);      \
 279        __young;                                                        \
 280})
 281
 282/*
 283 * set_pte_at_notify() sets the pte _after_ running the notifier.
 284 * This is safe to start by updating the secondary MMUs, because the primary MMU
 285 * pte invalidate must have already happened with a ptep_clear_flush() before
 286 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 287 * required when we change both the protection of the mapping from read-only to
 288 * read-write and the pfn (like during copy on write page faults). Otherwise the
 289 * old page would remain mapped readonly in the secondary MMUs after the new
 290 * page is already writable by some CPU through the primary MMU.
 291 */
 292#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 293({                                                                      \
 294        struct mm_struct *___mm = __mm;                                 \
 295        unsigned long ___address = __address;                           \
 296        pte_t ___pte = __pte;                                           \
 297                                                                        \
 298        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 299        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 300})
 301
 302extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
 303                                   void (*func)(struct rcu_head *rcu));
 304extern void mmu_notifier_synchronize(void);
 305
 306#else /* CONFIG_MMU_NOTIFIER */
 307
 308static inline void mmu_notifier_release(struct mm_struct *mm)
 309{
 310}
 311
 312static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 313                                          unsigned long start,
 314                                          unsigned long end)
 315{
 316        return 0;
 317}
 318
 319static inline int mmu_notifier_test_young(struct mm_struct *mm,
 320                                          unsigned long address)
 321{
 322        return 0;
 323}
 324
 325static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 326                                           unsigned long address, pte_t pte)
 327{
 328}
 329
 330static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 331                                          unsigned long address)
 332{
 333}
 334
 335static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 336                                  unsigned long start, unsigned long end)
 337{
 338}
 339
 340static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 341                                  unsigned long start, unsigned long end)
 342{
 343}
 344
 345static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 346{
 347}
 348
 349static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 350{
 351}
 352
 353#define ptep_clear_flush_young_notify ptep_clear_flush_young
 354#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 355#define set_pte_at_notify set_pte_at
 356
 357#endif /* CONFIG_MMU_NOTIFIER */
 358
 359#endif /* _LINUX_MMU_NOTIFIER_H */
 360