linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_MMU_NOTIFIER_H
   3#define _LINUX_MMU_NOTIFIER_H
   4
   5#include <linux/types.h>
   6#include <linux/list.h>
   7#include <linux/spinlock.h>
   8#include <linux/mm_types.h>
   9#include <linux/srcu.h>
  10
  11struct mmu_notifier;
  12struct mmu_notifier_ops;
  13
  14/* mmu_notifier_ops flags */
  15#define MMU_INVALIDATE_DOES_NOT_BLOCK   (0x01)
  16
  17#ifdef CONFIG_MMU_NOTIFIER
  18
  19/*
  20 * The mmu notifier_mm structure is allocated and installed in
  21 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  22 * critical section and it's released only when mm_count reaches zero
  23 * in mmdrop().
  24 */
  25struct mmu_notifier_mm {
  26        /* all mmu notifiers registerd in this mm are queued in this list */
  27        struct hlist_head list;
  28        /* to serialize the list modifications and hlist_unhashed */
  29        spinlock_t lock;
  30};
  31
  32struct mmu_notifier_ops {
  33        /*
  34         * Flags to specify behavior of callbacks for this MMU notifier.
  35         * Used to determine which context an operation may be called.
  36         *
  37         * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
  38         *      block
  39         */
  40        int flags;
  41
  42        /*
  43         * Called either by mmu_notifier_unregister or when the mm is
  44         * being destroyed by exit_mmap, always before all pages are
  45         * freed. This can run concurrently with other mmu notifier
  46         * methods (the ones invoked outside the mm context) and it
  47         * should tear down all secondary mmu mappings and freeze the
  48         * secondary mmu. If this method isn't implemented you've to
  49         * be sure that nothing could possibly write to the pages
  50         * through the secondary mmu by the time the last thread with
  51         * tsk->mm == mm exits.
  52         *
  53         * As side note: the pages freed after ->release returns could
  54         * be immediately reallocated by the gart at an alias physical
  55         * address with a different cache model, so if ->release isn't
  56         * implemented because all _software_ driven memory accesses
  57         * through the secondary mmu are terminated by the time the
  58         * last thread of this mm quits, you've also to be sure that
  59         * speculative _hardware_ operations can't allocate dirty
  60         * cachelines in the cpu that could not be snooped and made
  61         * coherent with the other read and write operations happening
  62         * through the gart alias address, so leading to memory
  63         * corruption.
  64         */
  65        void (*release)(struct mmu_notifier *mn,
  66                        struct mm_struct *mm);
  67
  68        /*
  69         * clear_flush_young is called after the VM is
  70         * test-and-clearing the young/accessed bitflag in the
  71         * pte. This way the VM will provide proper aging to the
  72         * accesses to the page through the secondary MMUs and not
  73         * only to the ones through the Linux pte.
  74         * Start-end is necessary in case the secondary MMU is mapping the page
  75         * at a smaller granularity than the primary MMU.
  76         */
  77        int (*clear_flush_young)(struct mmu_notifier *mn,
  78                                 struct mm_struct *mm,
  79                                 unsigned long start,
  80                                 unsigned long end);
  81
  82        /*
  83         * clear_young is a lightweight version of clear_flush_young. Like the
  84         * latter, it is supposed to test-and-clear the young/accessed bitflag
  85         * in the secondary pte, but it may omit flushing the secondary tlb.
  86         */
  87        int (*clear_young)(struct mmu_notifier *mn,
  88                           struct mm_struct *mm,
  89                           unsigned long start,
  90                           unsigned long end);
  91
  92        /*
  93         * test_young is called to check the young/accessed bitflag in
  94         * the secondary pte. This is used to know if the page is
  95         * frequently used without actually clearing the flag or tearing
  96         * down the secondary mapping on the page.
  97         */
  98        int (*test_young)(struct mmu_notifier *mn,
  99                          struct mm_struct *mm,
 100                          unsigned long address);
 101
 102        /*
 103         * change_pte is called in cases that pte mapping to page is changed:
 104         * for example, when ksm remaps pte to point to a new shared page.
 105         */
 106        void (*change_pte)(struct mmu_notifier *mn,
 107                           struct mm_struct *mm,
 108                           unsigned long address,
 109                           pte_t pte);
 110
 111        /*
 112         * invalidate_range_start() and invalidate_range_end() must be
 113         * paired and are called only when the mmap_sem and/or the
 114         * locks protecting the reverse maps are held. If the subsystem
 115         * can't guarantee that no additional references are taken to
 116         * the pages in the range, it has to implement the
 117         * invalidate_range() notifier to remove any references taken
 118         * after invalidate_range_start().
 119         *
 120         * Invalidation of multiple concurrent ranges may be
 121         * optionally permitted by the driver. Either way the
 122         * establishment of sptes is forbidden in the range passed to
 123         * invalidate_range_begin/end for the whole duration of the
 124         * invalidate_range_begin/end critical section.
 125         *
 126         * invalidate_range_start() is called when all pages in the
 127         * range are still mapped and have at least a refcount of one.
 128         *
 129         * invalidate_range_end() is called when all pages in the
 130         * range have been unmapped and the pages have been freed by
 131         * the VM.
 132         *
 133         * The VM will remove the page table entries and potentially
 134         * the page between invalidate_range_start() and
 135         * invalidate_range_end(). If the page must not be freed
 136         * because of pending I/O or other circumstances then the
 137         * invalidate_range_start() callback (or the initial mapping
 138         * by the driver) must make sure that the refcount is kept
 139         * elevated.
 140         *
 141         * If the driver increases the refcount when the pages are
 142         * initially mapped into an address space then either
 143         * invalidate_range_start() or invalidate_range_end() may
 144         * decrease the refcount. If the refcount is decreased on
 145         * invalidate_range_start() then the VM can free pages as page
 146         * table entries are removed.  If the refcount is only
 147         * droppped on invalidate_range_end() then the driver itself
 148         * will drop the last refcount but it must take care to flush
 149         * any secondary tlb before doing the final free on the
 150         * page. Pages will no longer be referenced by the linux
 151         * address space but may still be referenced by sptes until
 152         * the last refcount is dropped.
 153         *
 154         * If both of these callbacks cannot block, and invalidate_range
 155         * cannot block, mmu_notifier_ops.flags should have
 156         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 157         */
 158        void (*invalidate_range_start)(struct mmu_notifier *mn,
 159                                       struct mm_struct *mm,
 160                                       unsigned long start, unsigned long end);
 161        void (*invalidate_range_end)(struct mmu_notifier *mn,
 162                                     struct mm_struct *mm,
 163                                     unsigned long start, unsigned long end);
 164
 165        /*
 166         * invalidate_range() is either called between
 167         * invalidate_range_start() and invalidate_range_end() when the
 168         * VM has to free pages that where unmapped, but before the
 169         * pages are actually freed, or outside of _start()/_end() when
 170         * a (remote) TLB is necessary.
 171         *
 172         * If invalidate_range() is used to manage a non-CPU TLB with
 173         * shared page-tables, it not necessary to implement the
 174         * invalidate_range_start()/end() notifiers, as
 175         * invalidate_range() alread catches the points in time when an
 176         * external TLB range needs to be flushed. For more in depth
 177         * discussion on this see Documentation/vm/mmu_notifier.rst
 178         *
 179         * Note that this function might be called with just a sub-range
 180         * of what was passed to invalidate_range_start()/end(), if
 181         * called between those functions.
 182         *
 183         * If this callback cannot block, and invalidate_range_{start,end}
 184         * cannot block, mmu_notifier_ops.flags should have
 185         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 186         */
 187        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 188                                 unsigned long start, unsigned long end);
 189};
 190
 191/*
 192 * The notifier chains are protected by mmap_sem and/or the reverse map
 193 * semaphores. Notifier chains are only changed when all reverse maps and
 194 * the mmap_sem locks are taken.
 195 *
 196 * Therefore notifier chains can only be traversed when either
 197 *
 198 * 1. mmap_sem is held.
 199 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
 200 * 3. No other concurrent thread can access the list (release)
 201 */
 202struct mmu_notifier {
 203        struct hlist_node hlist;
 204        const struct mmu_notifier_ops *ops;
 205};
 206
 207static inline int mm_has_notifiers(struct mm_struct *mm)
 208{
 209        return unlikely(mm->mmu_notifier_mm);
 210}
 211
 212extern int mmu_notifier_register(struct mmu_notifier *mn,
 213                                 struct mm_struct *mm);
 214extern int __mmu_notifier_register(struct mmu_notifier *mn,
 215                                   struct mm_struct *mm);
 216extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 217                                    struct mm_struct *mm);
 218extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 219                                               struct mm_struct *mm);
 220extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 221extern void __mmu_notifier_release(struct mm_struct *mm);
 222extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 223                                          unsigned long start,
 224                                          unsigned long end);
 225extern int __mmu_notifier_clear_young(struct mm_struct *mm,
 226                                      unsigned long start,
 227                                      unsigned long end);
 228extern int __mmu_notifier_test_young(struct mm_struct *mm,
 229                                     unsigned long address);
 230extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 231                                      unsigned long address, pte_t pte);
 232extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 233                                  unsigned long start, unsigned long end);
 234extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 235                                  unsigned long start, unsigned long end,
 236                                  bool only_end);
 237extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 238                                  unsigned long start, unsigned long end);
 239extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
 240
 241static inline void mmu_notifier_release(struct mm_struct *mm)
 242{
 243        if (mm_has_notifiers(mm))
 244                __mmu_notifier_release(mm);
 245}
 246
 247static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 248                                          unsigned long start,
 249                                          unsigned long end)
 250{
 251        if (mm_has_notifiers(mm))
 252                return __mmu_notifier_clear_flush_young(mm, start, end);
 253        return 0;
 254}
 255
 256static inline int mmu_notifier_clear_young(struct mm_struct *mm,
 257                                           unsigned long start,
 258                                           unsigned long end)
 259{
 260        if (mm_has_notifiers(mm))
 261                return __mmu_notifier_clear_young(mm, start, end);
 262        return 0;
 263}
 264
 265static inline int mmu_notifier_test_young(struct mm_struct *mm,
 266                                          unsigned long address)
 267{
 268        if (mm_has_notifiers(mm))
 269                return __mmu_notifier_test_young(mm, address);
 270        return 0;
 271}
 272
 273static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 274                                           unsigned long address, pte_t pte)
 275{
 276        if (mm_has_notifiers(mm))
 277                __mmu_notifier_change_pte(mm, address, pte);
 278}
 279
 280static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 281                                  unsigned long start, unsigned long end)
 282{
 283        if (mm_has_notifiers(mm))
 284                __mmu_notifier_invalidate_range_start(mm, start, end);
 285}
 286
 287static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 288                                  unsigned long start, unsigned long end)
 289{
 290        if (mm_has_notifiers(mm))
 291                __mmu_notifier_invalidate_range_end(mm, start, end, false);
 292}
 293
 294static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
 295                                  unsigned long start, unsigned long end)
 296{
 297        if (mm_has_notifiers(mm))
 298                __mmu_notifier_invalidate_range_end(mm, start, end, true);
 299}
 300
 301static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 302                                  unsigned long start, unsigned long end)
 303{
 304        if (mm_has_notifiers(mm))
 305                __mmu_notifier_invalidate_range(mm, start, end);
 306}
 307
 308static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 309{
 310        mm->mmu_notifier_mm = NULL;
 311}
 312
 313static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 314{
 315        if (mm_has_notifiers(mm))
 316                __mmu_notifier_mm_destroy(mm);
 317}
 318
 319#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 320({                                                                      \
 321        int __young;                                                    \
 322        struct vm_area_struct *___vma = __vma;                          \
 323        unsigned long ___address = __address;                           \
 324        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 325        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 326                                                  ___address,           \
 327                                                  ___address +          \
 328                                                        PAGE_SIZE);     \
 329        __young;                                                        \
 330})
 331
 332#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 333({                                                                      \
 334        int __young;                                                    \
 335        struct vm_area_struct *___vma = __vma;                          \
 336        unsigned long ___address = __address;                           \
 337        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 338        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 339                                                  ___address,           \
 340                                                  ___address +          \
 341                                                        PMD_SIZE);      \
 342        __young;                                                        \
 343})
 344
 345#define ptep_clear_young_notify(__vma, __address, __ptep)               \
 346({                                                                      \
 347        int __young;                                                    \
 348        struct vm_area_struct *___vma = __vma;                          \
 349        unsigned long ___address = __address;                           \
 350        __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
 351        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 352                                            ___address + PAGE_SIZE);    \
 353        __young;                                                        \
 354})
 355
 356#define pmdp_clear_young_notify(__vma, __address, __pmdp)               \
 357({                                                                      \
 358        int __young;                                                    \
 359        struct vm_area_struct *___vma = __vma;                          \
 360        unsigned long ___address = __address;                           \
 361        __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
 362        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 363                                            ___address + PMD_SIZE);     \
 364        __young;                                                        \
 365})
 366
 367#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 368({                                                                      \
 369        unsigned long ___addr = __address & PAGE_MASK;                  \
 370        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 371        pte_t ___pte;                                                   \
 372                                                                        \
 373        ___pte = ptep_clear_flush(__vma, __address, __ptep);            \
 374        mmu_notifier_invalidate_range(___mm, ___addr,                   \
 375                                        ___addr + PAGE_SIZE);           \
 376                                                                        \
 377        ___pte;                                                         \
 378})
 379
 380#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd)             \
 381({                                                                      \
 382        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
 383        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 384        pmd_t ___pmd;                                                   \
 385                                                                        \
 386        ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd);          \
 387        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 388                                      ___haddr + HPAGE_PMD_SIZE);       \
 389                                                                        \
 390        ___pmd;                                                         \
 391})
 392
 393#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud)             \
 394({                                                                      \
 395        unsigned long ___haddr = __haddr & HPAGE_PUD_MASK;              \
 396        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 397        pud_t ___pud;                                                   \
 398                                                                        \
 399        ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud);          \
 400        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 401                                      ___haddr + HPAGE_PUD_SIZE);       \
 402                                                                        \
 403        ___pud;                                                         \
 404})
 405
 406/*
 407 * set_pte_at_notify() sets the pte _after_ running the notifier.
 408 * This is safe to start by updating the secondary MMUs, because the primary MMU
 409 * pte invalidate must have already happened with a ptep_clear_flush() before
 410 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 411 * required when we change both the protection of the mapping from read-only to
 412 * read-write and the pfn (like during copy on write page faults). Otherwise the
 413 * old page would remain mapped readonly in the secondary MMUs after the new
 414 * page is already writable by some CPU through the primary MMU.
 415 */
 416#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 417({                                                                      \
 418        struct mm_struct *___mm = __mm;                                 \
 419        unsigned long ___address = __address;                           \
 420        pte_t ___pte = __pte;                                           \
 421                                                                        \
 422        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 423        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 424})
 425
 426extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
 427                                   void (*func)(struct rcu_head *rcu));
 428extern void mmu_notifier_synchronize(void);
 429
 430#else /* CONFIG_MMU_NOTIFIER */
 431
 432static inline int mm_has_notifiers(struct mm_struct *mm)
 433{
 434        return 0;
 435}
 436
 437static inline void mmu_notifier_release(struct mm_struct *mm)
 438{
 439}
 440
 441static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 442                                          unsigned long start,
 443                                          unsigned long end)
 444{
 445        return 0;
 446}
 447
 448static inline int mmu_notifier_test_young(struct mm_struct *mm,
 449                                          unsigned long address)
 450{
 451        return 0;
 452}
 453
 454static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 455                                           unsigned long address, pte_t pte)
 456{
 457}
 458
 459static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 460                                  unsigned long start, unsigned long end)
 461{
 462}
 463
 464static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 465                                  unsigned long start, unsigned long end)
 466{
 467}
 468
 469static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
 470                                  unsigned long start, unsigned long end)
 471{
 472}
 473
 474static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 475                                  unsigned long start, unsigned long end)
 476{
 477}
 478
 479static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
 480{
 481        return false;
 482}
 483
 484static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 485{
 486}
 487
 488static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 489{
 490}
 491
 492#define ptep_clear_flush_young_notify ptep_clear_flush_young
 493#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 494#define ptep_clear_young_notify ptep_test_and_clear_young
 495#define pmdp_clear_young_notify pmdp_test_and_clear_young
 496#define ptep_clear_flush_notify ptep_clear_flush
 497#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
 498#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 499#define set_pte_at_notify set_pte_at
 500
 501#endif /* CONFIG_MMU_NOTIFIER */
 502
 503#endif /* _LINUX_MMU_NOTIFIER_H */
 504