linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1#ifndef _LINUX_MMU_NOTIFIER_H
   2#define _LINUX_MMU_NOTIFIER_H
   3
   4#include <linux/list.h>
   5#include <linux/spinlock.h>
   6#include <linux/mm_types.h>
   7#include <linux/srcu.h>
   8
   9struct mmu_notifier;
  10struct mmu_notifier_ops;
  11
  12#ifdef CONFIG_MMU_NOTIFIER
  13
  14/*
  15 * The mmu notifier_mm structure is allocated and installed in
  16 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  17 * critical section and it's released only when mm_count reaches zero
  18 * in mmdrop().
  19 */
  20struct mmu_notifier_mm {
  21        /* all mmu notifiers registerd in this mm are queued in this list */
  22        struct hlist_head list;
  23        /* to serialize the list modifications and hlist_unhashed */
  24        spinlock_t lock;
  25};
  26
  27struct mmu_notifier_ops {
  28        /*
  29         * Called either by mmu_notifier_unregister or when the mm is
  30         * being destroyed by exit_mmap, always before all pages are
  31         * freed. This can run concurrently with other mmu notifier
  32         * methods (the ones invoked outside the mm context) and it
  33         * should tear down all secondary mmu mappings and freeze the
  34         * secondary mmu. If this method isn't implemented you've to
  35         * be sure that nothing could possibly write to the pages
  36         * through the secondary mmu by the time the last thread with
  37         * tsk->mm == mm exits.
  38         *
  39         * As side note: the pages freed after ->release returns could
  40         * be immediately reallocated by the gart at an alias physical
  41         * address with a different cache model, so if ->release isn't
  42         * implemented because all _software_ driven memory accesses
  43         * through the secondary mmu are terminated by the time the
  44         * last thread of this mm quits, you've also to be sure that
  45         * speculative _hardware_ operations can't allocate dirty
  46         * cachelines in the cpu that could not be snooped and made
  47         * coherent with the other read and write operations happening
  48         * through the gart alias address, so leading to memory
  49         * corruption.
  50         */
  51        void (*release)(struct mmu_notifier *mn,
  52                        struct mm_struct *mm);
  53
  54        /*
  55         * clear_flush_young is called after the VM is
  56         * test-and-clearing the young/accessed bitflag in the
  57         * pte. This way the VM will provide proper aging to the
  58         * accesses to the page through the secondary MMUs and not
  59         * only to the ones through the Linux pte.
  60         * Start-end is necessary in case the secondary MMU is mapping the page
  61         * at a smaller granularity than the primary MMU.
  62         */
  63        int (*clear_flush_young)(struct mmu_notifier *mn,
  64                                 struct mm_struct *mm,
  65                                 unsigned long start,
  66                                 unsigned long end);
  67
  68        /*
  69         * clear_young is a lightweight version of clear_flush_young. Like the
  70         * latter, it is supposed to test-and-clear the young/accessed bitflag
  71         * in the secondary pte, but it may omit flushing the secondary tlb.
  72         */
  73        int (*clear_young)(struct mmu_notifier *mn,
  74                           struct mm_struct *mm,
  75                           unsigned long start,
  76                           unsigned long end);
  77
  78        /*
  79         * test_young is called to check the young/accessed bitflag in
  80         * the secondary pte. This is used to know if the page is
  81         * frequently used without actually clearing the flag or tearing
  82         * down the secondary mapping on the page.
  83         */
  84        int (*test_young)(struct mmu_notifier *mn,
  85                          struct mm_struct *mm,
  86                          unsigned long address);
  87
  88        /*
  89         * change_pte is called in cases that pte mapping to page is changed:
  90         * for example, when ksm remaps pte to point to a new shared page.
  91         */
  92        void (*change_pte)(struct mmu_notifier *mn,
  93                           struct mm_struct *mm,
  94                           unsigned long address,
  95                           pte_t pte);
  96
  97        /*
  98         * Before this is invoked any secondary MMU is still ok to
  99         * read/write to the page previously pointed to by the Linux
 100         * pte because the page hasn't been freed yet and it won't be
 101         * freed until this returns. If required set_page_dirty has to
 102         * be called internally to this method.
 103         */
 104        void (*invalidate_page)(struct mmu_notifier *mn,
 105                                struct mm_struct *mm,
 106                                unsigned long address);
 107
 108        /*
 109         * invalidate_range_start() and invalidate_range_end() must be
 110         * paired and are called only when the mmap_sem and/or the
 111         * locks protecting the reverse maps are held. If the subsystem
 112         * can't guarantee that no additional references are taken to
 113         * the pages in the range, it has to implement the
 114         * invalidate_range() notifier to remove any references taken
 115         * after invalidate_range_start().
 116         *
 117         * Invalidation of multiple concurrent ranges may be
 118         * optionally permitted by the driver. Either way the
 119         * establishment of sptes is forbidden in the range passed to
 120         * invalidate_range_begin/end for the whole duration of the
 121         * invalidate_range_begin/end critical section.
 122         *
 123         * invalidate_range_start() is called when all pages in the
 124         * range are still mapped and have at least a refcount of one.
 125         *
 126         * invalidate_range_end() is called when all pages in the
 127         * range have been unmapped and the pages have been freed by
 128         * the VM.
 129         *
 130         * The VM will remove the page table entries and potentially
 131         * the page between invalidate_range_start() and
 132         * invalidate_range_end(). If the page must not be freed
 133         * because of pending I/O or other circumstances then the
 134         * invalidate_range_start() callback (or the initial mapping
 135         * by the driver) must make sure that the refcount is kept
 136         * elevated.
 137         *
 138         * If the driver increases the refcount when the pages are
 139         * initially mapped into an address space then either
 140         * invalidate_range_start() or invalidate_range_end() may
 141         * decrease the refcount. If the refcount is decreased on
 142         * invalidate_range_start() then the VM can free pages as page
 143         * table entries are removed.  If the refcount is only
 144         * droppped on invalidate_range_end() then the driver itself
 145         * will drop the last refcount but it must take care to flush
 146         * any secondary tlb before doing the final free on the
 147         * page. Pages will no longer be referenced by the linux
 148         * address space but may still be referenced by sptes until
 149         * the last refcount is dropped.
 150         */
 151        void (*invalidate_range_start)(struct mmu_notifier *mn,
 152                                       struct mm_struct *mm,
 153                                       unsigned long start, unsigned long end);
 154        void (*invalidate_range_end)(struct mmu_notifier *mn,
 155                                     struct mm_struct *mm,
 156                                     unsigned long start, unsigned long end);
 157
 158        /*
 159         * invalidate_range() is either called between
 160         * invalidate_range_start() and invalidate_range_end() when the
 161         * VM has to free pages that where unmapped, but before the
 162         * pages are actually freed, or outside of _start()/_end() when
 163         * a (remote) TLB is necessary.
 164         *
 165         * If invalidate_range() is used to manage a non-CPU TLB with
 166         * shared page-tables, it not necessary to implement the
 167         * invalidate_range_start()/end() notifiers, as
 168         * invalidate_range() alread catches the points in time when an
 169         * external TLB range needs to be flushed.
 170         *
 171         * The invalidate_range() function is called under the ptl
 172         * spin-lock and not allowed to sleep.
 173         *
 174         * Note that this function might be called with just a sub-range
 175         * of what was passed to invalidate_range_start()/end(), if
 176         * called between those functions.
 177         */
 178        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 179                                 unsigned long start, unsigned long end);
 180};
 181
 182/*
 183 * The notifier chains are protected by mmap_sem and/or the reverse map
 184 * semaphores. Notifier chains are only changed when all reverse maps and
 185 * the mmap_sem locks are taken.
 186 *
 187 * Therefore notifier chains can only be traversed when either
 188 *
 189 * 1. mmap_sem is held.
 190 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
 191 * 3. No other concurrent thread can access the list (release)
 192 */
 193struct mmu_notifier {
 194        struct hlist_node hlist;
 195        const struct mmu_notifier_ops *ops;
 196};
 197
 198static inline int mm_has_notifiers(struct mm_struct *mm)
 199{
 200        return unlikely(mm->mmu_notifier_mm);
 201}
 202
 203extern int mmu_notifier_register(struct mmu_notifier *mn,
 204                                 struct mm_struct *mm);
 205extern int __mmu_notifier_register(struct mmu_notifier *mn,
 206                                   struct mm_struct *mm);
 207extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 208                                    struct mm_struct *mm);
 209extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 210                                               struct mm_struct *mm);
 211extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 212extern void __mmu_notifier_release(struct mm_struct *mm);
 213extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 214                                          unsigned long start,
 215                                          unsigned long end);
 216extern int __mmu_notifier_clear_young(struct mm_struct *mm,
 217                                      unsigned long start,
 218                                      unsigned long end);
 219extern int __mmu_notifier_test_young(struct mm_struct *mm,
 220                                     unsigned long address);
 221extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 222                                      unsigned long address, pte_t pte);
 223extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 224                                          unsigned long address);
 225extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 226                                  unsigned long start, unsigned long end);
 227extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 228                                  unsigned long start, unsigned long end);
 229extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 230                                  unsigned long start, unsigned long end);
 231
 232static inline void mmu_notifier_release(struct mm_struct *mm)
 233{
 234        if (mm_has_notifiers(mm))
 235                __mmu_notifier_release(mm);
 236}
 237
 238static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 239                                          unsigned long start,
 240                                          unsigned long end)
 241{
 242        if (mm_has_notifiers(mm))
 243                return __mmu_notifier_clear_flush_young(mm, start, end);
 244        return 0;
 245}
 246
 247static inline int mmu_notifier_clear_young(struct mm_struct *mm,
 248                                           unsigned long start,
 249                                           unsigned long end)
 250{
 251        if (mm_has_notifiers(mm))
 252                return __mmu_notifier_clear_young(mm, start, end);
 253        return 0;
 254}
 255
 256static inline int mmu_notifier_test_young(struct mm_struct *mm,
 257                                          unsigned long address)
 258{
 259        if (mm_has_notifiers(mm))
 260                return __mmu_notifier_test_young(mm, address);
 261        return 0;
 262}
 263
 264static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 265                                           unsigned long address, pte_t pte)
 266{
 267        if (mm_has_notifiers(mm))
 268                __mmu_notifier_change_pte(mm, address, pte);
 269}
 270
 271static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 272                                          unsigned long address)
 273{
 274        if (mm_has_notifiers(mm))
 275                __mmu_notifier_invalidate_page(mm, address);
 276}
 277
 278static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 279                                  unsigned long start, unsigned long end)
 280{
 281        if (mm_has_notifiers(mm))
 282                __mmu_notifier_invalidate_range_start(mm, start, end);
 283}
 284
 285static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 286                                  unsigned long start, unsigned long end)
 287{
 288        if (mm_has_notifiers(mm))
 289                __mmu_notifier_invalidate_range_end(mm, start, end);
 290}
 291
 292static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 293                                  unsigned long start, unsigned long end)
 294{
 295        if (mm_has_notifiers(mm))
 296                __mmu_notifier_invalidate_range(mm, start, end);
 297}
 298
 299static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 300{
 301        mm->mmu_notifier_mm = NULL;
 302}
 303
 304static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 305{
 306        if (mm_has_notifiers(mm))
 307                __mmu_notifier_mm_destroy(mm);
 308}
 309
 310#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 311({                                                                      \
 312        int __young;                                                    \
 313        struct vm_area_struct *___vma = __vma;                          \
 314        unsigned long ___address = __address;                           \
 315        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 316        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 317                                                  ___address,           \
 318                                                  ___address +          \
 319                                                        PAGE_SIZE);     \
 320        __young;                                                        \
 321})
 322
 323#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 324({                                                                      \
 325        int __young;                                                    \
 326        struct vm_area_struct *___vma = __vma;                          \
 327        unsigned long ___address = __address;                           \
 328        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 329        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 330                                                  ___address,           \
 331                                                  ___address +          \
 332                                                        PMD_SIZE);      \
 333        __young;                                                        \
 334})
 335
 336#define ptep_clear_young_notify(__vma, __address, __ptep)               \
 337({                                                                      \
 338        int __young;                                                    \
 339        struct vm_area_struct *___vma = __vma;                          \
 340        unsigned long ___address = __address;                           \
 341        __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
 342        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 343                                            ___address + PAGE_SIZE);    \
 344        __young;                                                        \
 345})
 346
 347#define pmdp_clear_young_notify(__vma, __address, __pmdp)               \
 348({                                                                      \
 349        int __young;                                                    \
 350        struct vm_area_struct *___vma = __vma;                          \
 351        unsigned long ___address = __address;                           \
 352        __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
 353        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 354                                            ___address + PMD_SIZE);     \
 355        __young;                                                        \
 356})
 357
 358#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 359({                                                                      \
 360        unsigned long ___addr = __address & PAGE_MASK;                  \
 361        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 362        pte_t ___pte;                                                   \
 363                                                                        \
 364        ___pte = ptep_clear_flush(__vma, __address, __ptep);            \
 365        mmu_notifier_invalidate_range(___mm, ___addr,                   \
 366                                        ___addr + PAGE_SIZE);           \
 367                                                                        \
 368        ___pte;                                                         \
 369})
 370
 371#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd)             \
 372({                                                                      \
 373        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
 374        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 375        pmd_t ___pmd;                                                   \
 376                                                                        \
 377        ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd);          \
 378        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 379                                      ___haddr + HPAGE_PMD_SIZE);       \
 380                                                                        \
 381        ___pmd;                                                         \
 382})
 383
 384#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)            \
 385({                                                                      \
 386        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
 387        pmd_t ___pmd;                                                   \
 388                                                                        \
 389        ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);         \
 390        mmu_notifier_invalidate_range(__mm, ___haddr,                   \
 391                                      ___haddr + HPAGE_PMD_SIZE);       \
 392                                                                        \
 393        ___pmd;                                                         \
 394})
 395
 396/*
 397 * set_pte_at_notify() sets the pte _after_ running the notifier.
 398 * This is safe to start by updating the secondary MMUs, because the primary MMU
 399 * pte invalidate must have already happened with a ptep_clear_flush() before
 400 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 401 * required when we change both the protection of the mapping from read-only to
 402 * read-write and the pfn (like during copy on write page faults). Otherwise the
 403 * old page would remain mapped readonly in the secondary MMUs after the new
 404 * page is already writable by some CPU through the primary MMU.
 405 */
 406#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 407({                                                                      \
 408        struct mm_struct *___mm = __mm;                                 \
 409        unsigned long ___address = __address;                           \
 410        pte_t ___pte = __pte;                                           \
 411                                                                        \
 412        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 413        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 414})
 415
 416extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
 417                                   void (*func)(struct rcu_head *rcu));
 418extern void mmu_notifier_synchronize(void);
 419
 420#else /* CONFIG_MMU_NOTIFIER */
 421
 422static inline void mmu_notifier_release(struct mm_struct *mm)
 423{
 424}
 425
 426static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 427                                          unsigned long start,
 428                                          unsigned long end)
 429{
 430        return 0;
 431}
 432
 433static inline int mmu_notifier_test_young(struct mm_struct *mm,
 434                                          unsigned long address)
 435{
 436        return 0;
 437}
 438
 439static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 440                                           unsigned long address, pte_t pte)
 441{
 442}
 443
 444static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 445                                          unsigned long address)
 446{
 447}
 448
 449static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 450                                  unsigned long start, unsigned long end)
 451{
 452}
 453
 454static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 455                                  unsigned long start, unsigned long end)
 456{
 457}
 458
 459static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 460                                  unsigned long start, unsigned long end)
 461{
 462}
 463
 464static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 465{
 466}
 467
 468static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 469{
 470}
 471
 472#define ptep_clear_flush_young_notify ptep_clear_flush_young
 473#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 474#define ptep_clear_young_notify ptep_test_and_clear_young
 475#define pmdp_clear_young_notify pmdp_test_and_clear_young
 476#define ptep_clear_flush_notify ptep_clear_flush
 477#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
 478#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
 479#define set_pte_at_notify set_pte_at
 480
 481#endif /* CONFIG_MMU_NOTIFIER */
 482
 483#endif /* _LINUX_MMU_NOTIFIER_H */
 484