linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_MMU_NOTIFIER_H
   3#define _LINUX_MMU_NOTIFIER_H
   4
   5#include <linux/list.h>
   6#include <linux/spinlock.h>
   7#include <linux/mm_types.h>
   8#include <linux/srcu.h>
   9
  10struct mmu_notifier;
  11struct mmu_notifier_ops;
  12
  13/**
  14 * enum mmu_notifier_event - reason for the mmu notifier callback
  15 * @MMU_NOTIFY_UNMAP: either munmap() that unmap the range or a mremap() that
  16 * move the range
  17 *
  18 * @MMU_NOTIFY_CLEAR: clear page table entry (many reasons for this like
  19 * madvise() or replacing a page by another one, ...).
  20 *
  21 * @MMU_NOTIFY_PROTECTION_VMA: update is due to protection change for the range
  22 * ie using the vma access permission (vm_page_prot) to update the whole range
  23 * is enough no need to inspect changes to the CPU page table (mprotect()
  24 * syscall)
  25 *
  26 * @MMU_NOTIFY_PROTECTION_PAGE: update is due to change in read/write flag for
  27 * pages in the range so to mirror those changes the user must inspect the CPU
  28 * page table (from the end callback).
  29 *
  30 * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same
  31 * access flags). User should soft dirty the page in the end callback to make
  32 * sure that anyone relying on soft dirtyness catch pages that might be written
  33 * through non CPU mappings.
  34 */
  35enum mmu_notifier_event {
  36        MMU_NOTIFY_UNMAP = 0,
  37        MMU_NOTIFY_CLEAR,
  38        MMU_NOTIFY_PROTECTION_VMA,
  39        MMU_NOTIFY_PROTECTION_PAGE,
  40        MMU_NOTIFY_SOFT_DIRTY,
  41};
  42
  43#ifdef CONFIG_MMU_NOTIFIER
  44
  45/*
  46 * The mmu notifier_mm structure is allocated and installed in
  47 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  48 * critical section and it's released only when mm_count reaches zero
  49 * in mmdrop().
  50 */
  51struct mmu_notifier_mm {
  52        /* all mmu notifiers registerd in this mm are queued in this list */
  53        struct hlist_head list;
  54        /* to serialize the list modifications and hlist_unhashed */
  55        spinlock_t lock;
  56};
  57
  58#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
  59
  60struct mmu_notifier_range {
  61        struct vm_area_struct *vma;
  62        struct mm_struct *mm;
  63        unsigned long start;
  64        unsigned long end;
  65        unsigned flags;
  66        enum mmu_notifier_event event;
  67};
  68
  69struct mmu_notifier_ops {
  70        /*
  71         * Called either by mmu_notifier_unregister or when the mm is
  72         * being destroyed by exit_mmap, always before all pages are
  73         * freed. This can run concurrently with other mmu notifier
  74         * methods (the ones invoked outside the mm context) and it
  75         * should tear down all secondary mmu mappings and freeze the
  76         * secondary mmu. If this method isn't implemented you've to
  77         * be sure that nothing could possibly write to the pages
  78         * through the secondary mmu by the time the last thread with
  79         * tsk->mm == mm exits.
  80         *
  81         * As side note: the pages freed after ->release returns could
  82         * be immediately reallocated by the gart at an alias physical
  83         * address with a different cache model, so if ->release isn't
  84         * implemented because all _software_ driven memory accesses
  85         * through the secondary mmu are terminated by the time the
  86         * last thread of this mm quits, you've also to be sure that
  87         * speculative _hardware_ operations can't allocate dirty
  88         * cachelines in the cpu that could not be snooped and made
  89         * coherent with the other read and write operations happening
  90         * through the gart alias address, so leading to memory
  91         * corruption.
  92         */
  93        void (*release)(struct mmu_notifier *mn,
  94                        struct mm_struct *mm);
  95
  96        /*
  97         * clear_flush_young is called after the VM is
  98         * test-and-clearing the young/accessed bitflag in the
  99         * pte. This way the VM will provide proper aging to the
 100         * accesses to the page through the secondary MMUs and not
 101         * only to the ones through the Linux pte.
 102         * Start-end is necessary in case the secondary MMU is mapping the page
 103         * at a smaller granularity than the primary MMU.
 104         */
 105        int (*clear_flush_young)(struct mmu_notifier *mn,
 106                                 struct mm_struct *mm,
 107                                 unsigned long start,
 108                                 unsigned long end);
 109
 110        /*
 111         * clear_young is a lightweight version of clear_flush_young. Like the
 112         * latter, it is supposed to test-and-clear the young/accessed bitflag
 113         * in the secondary pte, but it may omit flushing the secondary tlb.
 114         */
 115        int (*clear_young)(struct mmu_notifier *mn,
 116                           struct mm_struct *mm,
 117                           unsigned long start,
 118                           unsigned long end);
 119
 120        /*
 121         * test_young is called to check the young/accessed bitflag in
 122         * the secondary pte. This is used to know if the page is
 123         * frequently used without actually clearing the flag or tearing
 124         * down the secondary mapping on the page.
 125         */
 126        int (*test_young)(struct mmu_notifier *mn,
 127                          struct mm_struct *mm,
 128                          unsigned long address);
 129
 130        /*
 131         * change_pte is called in cases that pte mapping to page is changed:
 132         * for example, when ksm remaps pte to point to a new shared page.
 133         */
 134        void (*change_pte)(struct mmu_notifier *mn,
 135                           struct mm_struct *mm,
 136                           unsigned long address,
 137                           pte_t pte);
 138
 139        /*
 140         * invalidate_range_start() and invalidate_range_end() must be
 141         * paired and are called only when the mmap_sem and/or the
 142         * locks protecting the reverse maps are held. If the subsystem
 143         * can't guarantee that no additional references are taken to
 144         * the pages in the range, it has to implement the
 145         * invalidate_range() notifier to remove any references taken
 146         * after invalidate_range_start().
 147         *
 148         * Invalidation of multiple concurrent ranges may be
 149         * optionally permitted by the driver. Either way the
 150         * establishment of sptes is forbidden in the range passed to
 151         * invalidate_range_begin/end for the whole duration of the
 152         * invalidate_range_begin/end critical section.
 153         *
 154         * invalidate_range_start() is called when all pages in the
 155         * range are still mapped and have at least a refcount of one.
 156         *
 157         * invalidate_range_end() is called when all pages in the
 158         * range have been unmapped and the pages have been freed by
 159         * the VM.
 160         *
 161         * The VM will remove the page table entries and potentially
 162         * the page between invalidate_range_start() and
 163         * invalidate_range_end(). If the page must not be freed
 164         * because of pending I/O or other circumstances then the
 165         * invalidate_range_start() callback (or the initial mapping
 166         * by the driver) must make sure that the refcount is kept
 167         * elevated.
 168         *
 169         * If the driver increases the refcount when the pages are
 170         * initially mapped into an address space then either
 171         * invalidate_range_start() or invalidate_range_end() may
 172         * decrease the refcount. If the refcount is decreased on
 173         * invalidate_range_start() then the VM can free pages as page
 174         * table entries are removed.  If the refcount is only
 175         * droppped on invalidate_range_end() then the driver itself
 176         * will drop the last refcount but it must take care to flush
 177         * any secondary tlb before doing the final free on the
 178         * page. Pages will no longer be referenced by the linux
 179         * address space but may still be referenced by sptes until
 180         * the last refcount is dropped.
 181         *
 182         * If blockable argument is set to false then the callback cannot
 183         * sleep and has to return with -EAGAIN. 0 should be returned
 184         * otherwise. Please note that if invalidate_range_start approves
 185         * a non-blocking behavior then the same applies to
 186         * invalidate_range_end.
 187         *
 188         */
 189        int (*invalidate_range_start)(struct mmu_notifier *mn,
 190                                      const struct mmu_notifier_range *range);
 191        void (*invalidate_range_end)(struct mmu_notifier *mn,
 192                                     const struct mmu_notifier_range *range);
 193
 194        /*
 195         * invalidate_range() is either called between
 196         * invalidate_range_start() and invalidate_range_end() when the
 197         * VM has to free pages that where unmapped, but before the
 198         * pages are actually freed, or outside of _start()/_end() when
 199         * a (remote) TLB is necessary.
 200         *
 201         * If invalidate_range() is used to manage a non-CPU TLB with
 202         * shared page-tables, it not necessary to implement the
 203         * invalidate_range_start()/end() notifiers, as
 204         * invalidate_range() alread catches the points in time when an
 205         * external TLB range needs to be flushed. For more in depth
 206         * discussion on this see Documentation/vm/mmu_notifier.rst
 207         *
 208         * Note that this function might be called with just a sub-range
 209         * of what was passed to invalidate_range_start()/end(), if
 210         * called between those functions.
 211         */
 212        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 213                                 unsigned long start, unsigned long end);
 214};
 215
 216/*
 217 * The notifier chains are protected by mmap_sem and/or the reverse map
 218 * semaphores. Notifier chains are only changed when all reverse maps and
 219 * the mmap_sem locks are taken.
 220 *
 221 * Therefore notifier chains can only be traversed when either
 222 *
 223 * 1. mmap_sem is held.
 224 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
 225 * 3. No other concurrent thread can access the list (release)
 226 */
 227struct mmu_notifier {
 228        struct hlist_node hlist;
 229        const struct mmu_notifier_ops *ops;
 230};
 231
 232static inline int mm_has_notifiers(struct mm_struct *mm)
 233{
 234        return unlikely(mm->mmu_notifier_mm);
 235}
 236
 237extern int mmu_notifier_register(struct mmu_notifier *mn,
 238                                 struct mm_struct *mm);
 239extern int __mmu_notifier_register(struct mmu_notifier *mn,
 240                                   struct mm_struct *mm);
 241extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 242                                    struct mm_struct *mm);
 243extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 244                                               struct mm_struct *mm);
 245extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 246extern void __mmu_notifier_release(struct mm_struct *mm);
 247extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 248                                          unsigned long start,
 249                                          unsigned long end);
 250extern int __mmu_notifier_clear_young(struct mm_struct *mm,
 251                                      unsigned long start,
 252                                      unsigned long end);
 253extern int __mmu_notifier_test_young(struct mm_struct *mm,
 254                                     unsigned long address);
 255extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 256                                      unsigned long address, pte_t pte);
 257extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
 258extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r,
 259                                  bool only_end);
 260extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 261                                  unsigned long start, unsigned long end);
 262extern bool
 263mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range);
 264
 265static inline bool
 266mmu_notifier_range_blockable(const struct mmu_notifier_range *range)
 267{
 268        return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE);
 269}
 270
 271static inline void mmu_notifier_release(struct mm_struct *mm)
 272{
 273        if (mm_has_notifiers(mm))
 274                __mmu_notifier_release(mm);
 275}
 276
 277static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 278                                          unsigned long start,
 279                                          unsigned long end)
 280{
 281        if (mm_has_notifiers(mm))
 282                return __mmu_notifier_clear_flush_young(mm, start, end);
 283        return 0;
 284}
 285
 286static inline int mmu_notifier_clear_young(struct mm_struct *mm,
 287                                           unsigned long start,
 288                                           unsigned long end)
 289{
 290        if (mm_has_notifiers(mm))
 291                return __mmu_notifier_clear_young(mm, start, end);
 292        return 0;
 293}
 294
 295static inline int mmu_notifier_test_young(struct mm_struct *mm,
 296                                          unsigned long address)
 297{
 298        if (mm_has_notifiers(mm))
 299                return __mmu_notifier_test_young(mm, address);
 300        return 0;
 301}
 302
 303static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 304                                           unsigned long address, pte_t pte)
 305{
 306        if (mm_has_notifiers(mm))
 307                __mmu_notifier_change_pte(mm, address, pte);
 308}
 309
 310static inline void
 311mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 312{
 313        if (mm_has_notifiers(range->mm)) {
 314                range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
 315                __mmu_notifier_invalidate_range_start(range);
 316        }
 317}
 318
 319static inline int
 320mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 321{
 322        if (mm_has_notifiers(range->mm)) {
 323                range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE;
 324                return __mmu_notifier_invalidate_range_start(range);
 325        }
 326        return 0;
 327}
 328
 329static inline void
 330mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
 331{
 332        if (mm_has_notifiers(range->mm))
 333                __mmu_notifier_invalidate_range_end(range, false);
 334}
 335
 336static inline void
 337mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range)
 338{
 339        if (mm_has_notifiers(range->mm))
 340                __mmu_notifier_invalidate_range_end(range, true);
 341}
 342
 343static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 344                                  unsigned long start, unsigned long end)
 345{
 346        if (mm_has_notifiers(mm))
 347                __mmu_notifier_invalidate_range(mm, start, end);
 348}
 349
 350static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 351{
 352        mm->mmu_notifier_mm = NULL;
 353}
 354
 355static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 356{
 357        if (mm_has_notifiers(mm))
 358                __mmu_notifier_mm_destroy(mm);
 359}
 360
 361
 362static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
 363                                           enum mmu_notifier_event event,
 364                                           unsigned flags,
 365                                           struct vm_area_struct *vma,
 366                                           struct mm_struct *mm,
 367                                           unsigned long start,
 368                                           unsigned long end)
 369{
 370        range->vma = vma;
 371        range->event = event;
 372        range->mm = mm;
 373        range->start = start;
 374        range->end = end;
 375        range->flags = flags;
 376}
 377
 378#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 379({                                                                      \
 380        int __young;                                                    \
 381        struct vm_area_struct *___vma = __vma;                          \
 382        unsigned long ___address = __address;                           \
 383        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 384        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 385                                                  ___address,           \
 386                                                  ___address +          \
 387                                                        PAGE_SIZE);     \
 388        __young;                                                        \
 389})
 390
 391#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 392({                                                                      \
 393        int __young;                                                    \
 394        struct vm_area_struct *___vma = __vma;                          \
 395        unsigned long ___address = __address;                           \
 396        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 397        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 398                                                  ___address,           \
 399                                                  ___address +          \
 400                                                        PMD_SIZE);      \
 401        __young;                                                        \
 402})
 403
 404#define ptep_clear_young_notify(__vma, __address, __ptep)               \
 405({                                                                      \
 406        int __young;                                                    \
 407        struct vm_area_struct *___vma = __vma;                          \
 408        unsigned long ___address = __address;                           \
 409        __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
 410        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 411                                            ___address + PAGE_SIZE);    \
 412        __young;                                                        \
 413})
 414
 415#define pmdp_clear_young_notify(__vma, __address, __pmdp)               \
 416({                                                                      \
 417        int __young;                                                    \
 418        struct vm_area_struct *___vma = __vma;                          \
 419        unsigned long ___address = __address;                           \
 420        __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
 421        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 422                                            ___address + PMD_SIZE);     \
 423        __young;                                                        \
 424})
 425
 426#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 427({                                                                      \
 428        unsigned long ___addr = __address & PAGE_MASK;                  \
 429        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 430        pte_t ___pte;                                                   \
 431                                                                        \
 432        ___pte = ptep_clear_flush(__vma, __address, __ptep);            \
 433        mmu_notifier_invalidate_range(___mm, ___addr,                   \
 434                                        ___addr + PAGE_SIZE);           \
 435                                                                        \
 436        ___pte;                                                         \
 437})
 438
 439#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd)             \
 440({                                                                      \
 441        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
 442        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 443        pmd_t ___pmd;                                                   \
 444                                                                        \
 445        ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd);          \
 446        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 447                                      ___haddr + HPAGE_PMD_SIZE);       \
 448                                                                        \
 449        ___pmd;                                                         \
 450})
 451
 452#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud)             \
 453({                                                                      \
 454        unsigned long ___haddr = __haddr & HPAGE_PUD_MASK;              \
 455        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 456        pud_t ___pud;                                                   \
 457                                                                        \
 458        ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud);          \
 459        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 460                                      ___haddr + HPAGE_PUD_SIZE);       \
 461                                                                        \
 462        ___pud;                                                         \
 463})
 464
 465/*
 466 * set_pte_at_notify() sets the pte _after_ running the notifier.
 467 * This is safe to start by updating the secondary MMUs, because the primary MMU
 468 * pte invalidate must have already happened with a ptep_clear_flush() before
 469 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 470 * required when we change both the protection of the mapping from read-only to
 471 * read-write and the pfn (like during copy on write page faults). Otherwise the
 472 * old page would remain mapped readonly in the secondary MMUs after the new
 473 * page is already writable by some CPU through the primary MMU.
 474 */
 475#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 476({                                                                      \
 477        struct mm_struct *___mm = __mm;                                 \
 478        unsigned long ___address = __address;                           \
 479        pte_t ___pte = __pte;                                           \
 480                                                                        \
 481        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 482        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 483})
 484
 485extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
 486                                   void (*func)(struct rcu_head *rcu));
 487
 488#else /* CONFIG_MMU_NOTIFIER */
 489
 490struct mmu_notifier_range {
 491        unsigned long start;
 492        unsigned long end;
 493};
 494
 495static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range,
 496                                            unsigned long start,
 497                                            unsigned long end)
 498{
 499        range->start = start;
 500        range->end = end;
 501}
 502
 503#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end)  \
 504        _mmu_notifier_range_init(range, start, end)
 505
 506static inline bool
 507mmu_notifier_range_blockable(const struct mmu_notifier_range *range)
 508{
 509        return true;
 510}
 511
 512static inline int mm_has_notifiers(struct mm_struct *mm)
 513{
 514        return 0;
 515}
 516
 517static inline void mmu_notifier_release(struct mm_struct *mm)
 518{
 519}
 520
 521static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 522                                          unsigned long start,
 523                                          unsigned long end)
 524{
 525        return 0;
 526}
 527
 528static inline int mmu_notifier_test_young(struct mm_struct *mm,
 529                                          unsigned long address)
 530{
 531        return 0;
 532}
 533
 534static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 535                                           unsigned long address, pte_t pte)
 536{
 537}
 538
 539static inline void
 540mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 541{
 542}
 543
 544static inline int
 545mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 546{
 547        return 0;
 548}
 549
 550static inline
 551void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
 552{
 553}
 554
 555static inline void
 556mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range)
 557{
 558}
 559
 560static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 561                                  unsigned long start, unsigned long end)
 562{
 563}
 564
 565static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 566{
 567}
 568
 569static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 570{
 571}
 572
 573#define mmu_notifier_range_update_to_read_only(r) false
 574
 575#define ptep_clear_flush_young_notify ptep_clear_flush_young
 576#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 577#define ptep_clear_young_notify ptep_test_and_clear_young
 578#define pmdp_clear_young_notify pmdp_test_and_clear_young
 579#define ptep_clear_flush_notify ptep_clear_flush
 580#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
 581#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 582#define set_pte_at_notify set_pte_at
 583
 584#endif /* CONFIG_MMU_NOTIFIER */
 585
 586#endif /* _LINUX_MMU_NOTIFIER_H */
 587