linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_MMU_NOTIFIER_H
   3#define _LINUX_MMU_NOTIFIER_H
   4
   5#include <linux/types.h>
   6#include <linux/list.h>
   7#include <linux/spinlock.h>
   8#include <linux/mm_types.h>
   9#include <linux/srcu.h>
  10#include <linux/rh_kabi.h>
  11
  12struct mmu_notifier;
  13struct mmu_notifier_ops;
  14
  15/* mmu_notifier_ops flags */
  16#define MMU_INVALIDATE_DOES_NOT_BLOCK   (0x01)
  17
  18#ifdef CONFIG_MMU_NOTIFIER
  19
  20/*
  21 * The mmu notifier_mm structure is allocated and installed in
  22 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  23 * critical section and it's released only when mm_count reaches zero
  24 * in mmdrop().
  25 */
  26struct mmu_notifier_mm {
  27        /* all mmu notifiers registerd in this mm are queued in this list */
  28        struct hlist_head list;
  29        /* to serialize the list modifications and hlist_unhashed */
  30        spinlock_t lock;
  31};
  32
  33struct mmu_notifier_ops {
  34        /*
  35         * Flags to specify behavior of callbacks for this MMU notifier.
  36         * Used to determine which context an operation may be called.
  37         *
  38         * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
  39         *      block
  40         */
  41        int flags;
  42
  43        /*
  44         * Called either by mmu_notifier_unregister or when the mm is
  45         * being destroyed by exit_mmap, always before all pages are
  46         * freed. This can run concurrently with other mmu notifier
  47         * methods (the ones invoked outside the mm context) and it
  48         * should tear down all secondary mmu mappings and freeze the
  49         * secondary mmu. If this method isn't implemented you've to
  50         * be sure that nothing could possibly write to the pages
  51         * through the secondary mmu by the time the last thread with
  52         * tsk->mm == mm exits.
  53         *
  54         * As side note: the pages freed after ->release returns could
  55         * be immediately reallocated by the gart at an alias physical
  56         * address with a different cache model, so if ->release isn't
  57         * implemented because all _software_ driven memory accesses
  58         * through the secondary mmu are terminated by the time the
  59         * last thread of this mm quits, you've also to be sure that
  60         * speculative _hardware_ operations can't allocate dirty
  61         * cachelines in the cpu that could not be snooped and made
  62         * coherent with the other read and write operations happening
  63         * through the gart alias address, so leading to memory
  64         * corruption.
  65         */
  66        void (*release)(struct mmu_notifier *mn,
  67                        struct mm_struct *mm);
  68
  69        /*
  70         * clear_flush_young is called after the VM is
  71         * test-and-clearing the young/accessed bitflag in the
  72         * pte. This way the VM will provide proper aging to the
  73         * accesses to the page through the secondary MMUs and not
  74         * only to the ones through the Linux pte.
  75         * Start-end is necessary in case the secondary MMU is mapping the page
  76         * at a smaller granularity than the primary MMU.
  77         */
  78        int (*clear_flush_young)(struct mmu_notifier *mn,
  79                                 struct mm_struct *mm,
  80                                 unsigned long start,
  81                                 unsigned long end);
  82
  83        /*
  84         * clear_young is a lightweight version of clear_flush_young. Like the
  85         * latter, it is supposed to test-and-clear the young/accessed bitflag
  86         * in the secondary pte, but it may omit flushing the secondary tlb.
  87         */
  88        int (*clear_young)(struct mmu_notifier *mn,
  89                           struct mm_struct *mm,
  90                           unsigned long start,
  91                           unsigned long end);
  92
  93        /*
  94         * test_young is called to check the young/accessed bitflag in
  95         * the secondary pte. This is used to know if the page is
  96         * frequently used without actually clearing the flag or tearing
  97         * down the secondary mapping on the page.
  98         */
  99        int (*test_young)(struct mmu_notifier *mn,
 100                          struct mm_struct *mm,
 101                          unsigned long address);
 102
 103        /*
 104         * change_pte is called in cases that pte mapping to page is changed:
 105         * for example, when ksm remaps pte to point to a new shared page.
 106         */
 107        void (*change_pte)(struct mmu_notifier *mn,
 108                           struct mm_struct *mm,
 109                           unsigned long address,
 110                           pte_t pte);
 111
 112        /*
 113         * invalidate_range_start() and invalidate_range_end() must be
 114         * paired and are called only when the mmap_sem and/or the
 115         * locks protecting the reverse maps are held. If the subsystem
 116         * can't guarantee that no additional references are taken to
 117         * the pages in the range, it has to implement the
 118         * invalidate_range() notifier to remove any references taken
 119         * after invalidate_range_start().
 120         *
 121         * Invalidation of multiple concurrent ranges may be
 122         * optionally permitted by the driver. Either way the
 123         * establishment of sptes is forbidden in the range passed to
 124         * invalidate_range_begin/end for the whole duration of the
 125         * invalidate_range_begin/end critical section.
 126         *
 127         * invalidate_range_start() is called when all pages in the
 128         * range are still mapped and have at least a refcount of one.
 129         *
 130         * invalidate_range_end() is called when all pages in the
 131         * range have been unmapped and the pages have been freed by
 132         * the VM.
 133         *
 134         * The VM will remove the page table entries and potentially
 135         * the page between invalidate_range_start() and
 136         * invalidate_range_end(). If the page must not be freed
 137         * because of pending I/O or other circumstances then the
 138         * invalidate_range_start() callback (or the initial mapping
 139         * by the driver) must make sure that the refcount is kept
 140         * elevated.
 141         *
 142         * If the driver increases the refcount when the pages are
 143         * initially mapped into an address space then either
 144         * invalidate_range_start() or invalidate_range_end() may
 145         * decrease the refcount. If the refcount is decreased on
 146         * invalidate_range_start() then the VM can free pages as page
 147         * table entries are removed.  If the refcount is only
 148         * droppped on invalidate_range_end() then the driver itself
 149         * will drop the last refcount but it must take care to flush
 150         * any secondary tlb before doing the final free on the
 151         * page. Pages will no longer be referenced by the linux
 152         * address space but may still be referenced by sptes until
 153         * the last refcount is dropped.
 154         *
 155         * If both of these callbacks cannot block, and invalidate_range
 156         * cannot block, mmu_notifier_ops.flags should have
 157         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 158         */
 159        void (*invalidate_range_start)(struct mmu_notifier *mn,
 160                                       struct mm_struct *mm,
 161                                       unsigned long start, unsigned long end);
 162        void (*invalidate_range_end)(struct mmu_notifier *mn,
 163                                     struct mm_struct *mm,
 164                                     unsigned long start, unsigned long end);
 165
 166        /*
 167         * invalidate_range() is either called between
 168         * invalidate_range_start() and invalidate_range_end() when the
 169         * VM has to free pages that where unmapped, but before the
 170         * pages are actually freed, or outside of _start()/_end() when
 171         * a (remote) TLB is necessary.
 172         *
 173         * If invalidate_range() is used to manage a non-CPU TLB with
 174         * shared page-tables, it not necessary to implement the
 175         * invalidate_range_start()/end() notifiers, as
 176         * invalidate_range() alread catches the points in time when an
 177         * external TLB range needs to be flushed. For more in depth
 178         * discussion on this see Documentation/vm/mmu_notifier.rst
 179         *
 180         * Note that this function might be called with just a sub-range
 181         * of what was passed to invalidate_range_start()/end(), if
 182         * called between those functions.
 183         *
 184         * If this callback cannot block, and invalidate_range_{start,end}
 185         * cannot block, mmu_notifier_ops.flags should have
 186         * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 187         */
 188        void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 189                                 unsigned long start, unsigned long end);
 190
 191        RH_KABI_RESERVE(1)
 192        RH_KABI_RESERVE(2)
 193        RH_KABI_RESERVE(3)
 194        RH_KABI_RESERVE(4)
 195};
 196
 197/*
 198 * The notifier chains are protected by mmap_sem and/or the reverse map
 199 * semaphores. Notifier chains are only changed when all reverse maps and
 200 * the mmap_sem locks are taken.
 201 *
 202 * Therefore notifier chains can only be traversed when either
 203 *
 204 * 1. mmap_sem is held.
 205 * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
 206 * 3. No other concurrent thread can access the list (release)
 207 */
 208struct mmu_notifier {
 209        struct hlist_node hlist;
 210        const struct mmu_notifier_ops *ops;
 211        RH_KABI_RESERVE(1)
 212        RH_KABI_RESERVE(2)
 213};
 214
 215static inline int mm_has_notifiers(struct mm_struct *mm)
 216{
 217        return unlikely(mm->mmu_notifier_mm);
 218}
 219
 220extern int mmu_notifier_register(struct mmu_notifier *mn,
 221                                 struct mm_struct *mm);
 222extern int __mmu_notifier_register(struct mmu_notifier *mn,
 223                                   struct mm_struct *mm);
 224extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 225                                    struct mm_struct *mm);
 226extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 227                                               struct mm_struct *mm);
 228extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 229extern void __mmu_notifier_release(struct mm_struct *mm);
 230extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 231                                          unsigned long start,
 232                                          unsigned long end);
 233extern int __mmu_notifier_clear_young(struct mm_struct *mm,
 234                                      unsigned long start,
 235                                      unsigned long end);
 236extern int __mmu_notifier_test_young(struct mm_struct *mm,
 237                                     unsigned long address);
 238extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 239                                      unsigned long address, pte_t pte);
 240extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 241                                  unsigned long start, unsigned long end);
 242extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 243                                  unsigned long start, unsigned long end,
 244                                  bool only_end);
 245extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 246                                  unsigned long start, unsigned long end);
 247extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
 248
 249static inline void mmu_notifier_release(struct mm_struct *mm)
 250{
 251        if (mm_has_notifiers(mm))
 252                __mmu_notifier_release(mm);
 253}
 254
 255static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 256                                          unsigned long start,
 257                                          unsigned long end)
 258{
 259        if (mm_has_notifiers(mm))
 260                return __mmu_notifier_clear_flush_young(mm, start, end);
 261        return 0;
 262}
 263
 264static inline int mmu_notifier_clear_young(struct mm_struct *mm,
 265                                           unsigned long start,
 266                                           unsigned long end)
 267{
 268        if (mm_has_notifiers(mm))
 269                return __mmu_notifier_clear_young(mm, start, end);
 270        return 0;
 271}
 272
 273static inline int mmu_notifier_test_young(struct mm_struct *mm,
 274                                          unsigned long address)
 275{
 276        if (mm_has_notifiers(mm))
 277                return __mmu_notifier_test_young(mm, address);
 278        return 0;
 279}
 280
 281static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 282                                           unsigned long address, pte_t pte)
 283{
 284        if (mm_has_notifiers(mm))
 285                __mmu_notifier_change_pte(mm, address, pte);
 286}
 287
 288static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 289                                  unsigned long start, unsigned long end)
 290{
 291        if (mm_has_notifiers(mm))
 292                __mmu_notifier_invalidate_range_start(mm, start, end);
 293}
 294
 295static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 296                                  unsigned long start, unsigned long end)
 297{
 298        if (mm_has_notifiers(mm))
 299                __mmu_notifier_invalidate_range_end(mm, start, end, false);
 300}
 301
 302static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
 303                                  unsigned long start, unsigned long end)
 304{
 305        if (mm_has_notifiers(mm))
 306                __mmu_notifier_invalidate_range_end(mm, start, end, true);
 307}
 308
 309static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 310                                  unsigned long start, unsigned long end)
 311{
 312        if (mm_has_notifiers(mm))
 313                __mmu_notifier_invalidate_range(mm, start, end);
 314}
 315
 316static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 317{
 318        mm->mmu_notifier_mm = NULL;
 319}
 320
 321static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 322{
 323        if (mm_has_notifiers(mm))
 324                __mmu_notifier_mm_destroy(mm);
 325}
 326
 327#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 328({                                                                      \
 329        int __young;                                                    \
 330        struct vm_area_struct *___vma = __vma;                          \
 331        unsigned long ___address = __address;                           \
 332        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 333        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 334                                                  ___address,           \
 335                                                  ___address +          \
 336                                                        PAGE_SIZE);     \
 337        __young;                                                        \
 338})
 339
 340#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 341({                                                                      \
 342        int __young;                                                    \
 343        struct vm_area_struct *___vma = __vma;                          \
 344        unsigned long ___address = __address;                           \
 345        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 346        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 347                                                  ___address,           \
 348                                                  ___address +          \
 349                                                        PMD_SIZE);      \
 350        __young;                                                        \
 351})
 352
 353#define ptep_clear_young_notify(__vma, __address, __ptep)               \
 354({                                                                      \
 355        int __young;                                                    \
 356        struct vm_area_struct *___vma = __vma;                          \
 357        unsigned long ___address = __address;                           \
 358        __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\
 359        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 360                                            ___address + PAGE_SIZE);    \
 361        __young;                                                        \
 362})
 363
 364#define pmdp_clear_young_notify(__vma, __address, __pmdp)               \
 365({                                                                      \
 366        int __young;                                                    \
 367        struct vm_area_struct *___vma = __vma;                          \
 368        unsigned long ___address = __address;                           \
 369        __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\
 370        __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address,  \
 371                                            ___address + PMD_SIZE);     \
 372        __young;                                                        \
 373})
 374
 375#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 376({                                                                      \
 377        unsigned long ___addr = __address & PAGE_MASK;                  \
 378        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 379        pte_t ___pte;                                                   \
 380                                                                        \
 381        ___pte = ptep_clear_flush(__vma, __address, __ptep);            \
 382        mmu_notifier_invalidate_range(___mm, ___addr,                   \
 383                                        ___addr + PAGE_SIZE);           \
 384                                                                        \
 385        ___pte;                                                         \
 386})
 387
 388#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd)             \
 389({                                                                      \
 390        unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
 391        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 392        pmd_t ___pmd;                                                   \
 393                                                                        \
 394        ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd);          \
 395        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 396                                      ___haddr + HPAGE_PMD_SIZE);       \
 397                                                                        \
 398        ___pmd;                                                         \
 399})
 400
 401#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud)             \
 402({                                                                      \
 403        unsigned long ___haddr = __haddr & HPAGE_PUD_MASK;              \
 404        struct mm_struct *___mm = (__vma)->vm_mm;                       \
 405        pud_t ___pud;                                                   \
 406                                                                        \
 407        ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud);          \
 408        mmu_notifier_invalidate_range(___mm, ___haddr,                  \
 409                                      ___haddr + HPAGE_PUD_SIZE);       \
 410                                                                        \
 411        ___pud;                                                         \
 412})
 413
 414/*
 415 * set_pte_at_notify() sets the pte _after_ running the notifier.
 416 * This is safe to start by updating the secondary MMUs, because the primary MMU
 417 * pte invalidate must have already happened with a ptep_clear_flush() before
 418 * set_pte_at_notify() has been invoked.  Updating the secondary MMUs first is
 419 * required when we change both the protection of the mapping from read-only to
 420 * read-write and the pfn (like during copy on write page faults). Otherwise the
 421 * old page would remain mapped readonly in the secondary MMUs after the new
 422 * page is already writable by some CPU through the primary MMU.
 423 */
 424#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 425({                                                                      \
 426        struct mm_struct *___mm = __mm;                                 \
 427        unsigned long ___address = __address;                           \
 428        pte_t ___pte = __pte;                                           \
 429                                                                        \
 430        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 431        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 432})
 433
 434extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
 435                                   void (*func)(struct rcu_head *rcu));
 436extern void mmu_notifier_synchronize(void);
 437
 438#else /* CONFIG_MMU_NOTIFIER */
 439
 440static inline int mm_has_notifiers(struct mm_struct *mm)
 441{
 442        return 0;
 443}
 444
 445static inline void mmu_notifier_release(struct mm_struct *mm)
 446{
 447}
 448
 449static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 450                                          unsigned long start,
 451                                          unsigned long end)
 452{
 453        return 0;
 454}
 455
 456static inline int mmu_notifier_test_young(struct mm_struct *mm,
 457                                          unsigned long address)
 458{
 459        return 0;
 460}
 461
 462static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 463                                           unsigned long address, pte_t pte)
 464{
 465}
 466
 467static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 468                                  unsigned long start, unsigned long end)
 469{
 470}
 471
 472static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 473                                  unsigned long start, unsigned long end)
 474{
 475}
 476
 477static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm,
 478                                  unsigned long start, unsigned long end)
 479{
 480}
 481
 482static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 483                                  unsigned long start, unsigned long end)
 484{
 485}
 486
 487static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
 488{
 489        return false;
 490}
 491
 492static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 493{
 494}
 495
 496static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 497{
 498}
 499
 500#define ptep_clear_flush_young_notify ptep_clear_flush_young
 501#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 502#define ptep_clear_young_notify ptep_test_and_clear_young
 503#define pmdp_clear_young_notify pmdp_test_and_clear_young
 504#define ptep_clear_flush_notify ptep_clear_flush
 505#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
 506#define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 507#define set_pte_at_notify set_pte_at
 508
 509#endif /* CONFIG_MMU_NOTIFIER */
 510
 511#endif /* _LINUX_MMU_NOTIFIER_H */
 512