linux/include/linux/mmu_notifier.h
<<
>>
Prefs
   1#ifndef _LINUX_MMU_NOTIFIER_H
   2#define _LINUX_MMU_NOTIFIER_H
   3
   4#include <linux/list.h>
   5#include <linux/spinlock.h>
   6#include <linux/mm_types.h>
   7
   8struct mmu_notifier;
   9struct mmu_notifier_ops;
  10
  11#ifdef CONFIG_MMU_NOTIFIER
  12
  13/*
  14 * The mmu notifier_mm structure is allocated and installed in
  15 * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
  16 * critical section and it's released only when mm_count reaches zero
  17 * in mmdrop().
  18 */
  19struct mmu_notifier_mm {
  20        /* all mmu notifiers registerd in this mm are queued in this list */
  21        struct hlist_head list;
  22        /* to serialize the list modifications and hlist_unhashed */
  23        spinlock_t lock;
  24};
  25
  26struct mmu_notifier_ops {
  27        /*
  28         * Called either by mmu_notifier_unregister or when the mm is
  29         * being destroyed by exit_mmap, always before all pages are
  30         * freed. This can run concurrently with other mmu notifier
  31         * methods (the ones invoked outside the mm context) and it
  32         * should tear down all secondary mmu mappings and freeze the
  33         * secondary mmu. If this method isn't implemented you've to
  34         * be sure that nothing could possibly write to the pages
  35         * through the secondary mmu by the time the last thread with
  36         * tsk->mm == mm exits.
  37         *
  38         * As side note: the pages freed after ->release returns could
  39         * be immediately reallocated by the gart at an alias physical
  40         * address with a different cache model, so if ->release isn't
  41         * implemented because all _software_ driven memory accesses
  42         * through the secondary mmu are terminated by the time the
  43         * last thread of this mm quits, you've also to be sure that
  44         * speculative _hardware_ operations can't allocate dirty
  45         * cachelines in the cpu that could not be snooped and made
  46         * coherent with the other read and write operations happening
  47         * through the gart alias address, so leading to memory
  48         * corruption.
  49         */
  50        void (*release)(struct mmu_notifier *mn,
  51                        struct mm_struct *mm);
  52
  53        /*
  54         * clear_flush_young is called after the VM is
  55         * test-and-clearing the young/accessed bitflag in the
  56         * pte. This way the VM will provide proper aging to the
  57         * accesses to the page through the secondary MMUs and not
  58         * only to the ones through the Linux pte.
  59         */
  60        int (*clear_flush_young)(struct mmu_notifier *mn,
  61                                 struct mm_struct *mm,
  62                                 unsigned long address);
  63
  64        /*
  65         * test_young is called to check the young/accessed bitflag in
  66         * the secondary pte. This is used to know if the page is
  67         * frequently used without actually clearing the flag or tearing
  68         * down the secondary mapping on the page.
  69         */
  70        int (*test_young)(struct mmu_notifier *mn,
  71                          struct mm_struct *mm,
  72                          unsigned long address);
  73
  74        /*
  75         * change_pte is called in cases that pte mapping to page is changed:
  76         * for example, when ksm remaps pte to point to a new shared page.
  77         */
  78        void (*change_pte)(struct mmu_notifier *mn,
  79                           struct mm_struct *mm,
  80                           unsigned long address,
  81                           pte_t pte);
  82
  83        /*
  84         * Before this is invoked any secondary MMU is still ok to
  85         * read/write to the page previously pointed to by the Linux
  86         * pte because the page hasn't been freed yet and it won't be
  87         * freed until this returns. If required set_page_dirty has to
  88         * be called internally to this method.
  89         */
  90        void (*invalidate_page)(struct mmu_notifier *mn,
  91                                struct mm_struct *mm,
  92                                unsigned long address);
  93
  94        /*
  95         * invalidate_range_start() and invalidate_range_end() must be
  96         * paired and are called only when the mmap_sem and/or the
  97         * locks protecting the reverse maps are held. The subsystem
  98         * must guarantee that no additional references are taken to
  99         * the pages in the range established between the call to
 100         * invalidate_range_start() and the matching call to
 101         * invalidate_range_end().
 102         *
 103         * Invalidation of multiple concurrent ranges may be
 104         * optionally permitted by the driver. Either way the
 105         * establishment of sptes is forbidden in the range passed to
 106         * invalidate_range_begin/end for the whole duration of the
 107         * invalidate_range_begin/end critical section.
 108         *
 109         * invalidate_range_start() is called when all pages in the
 110         * range are still mapped and have at least a refcount of one.
 111         *
 112         * invalidate_range_end() is called when all pages in the
 113         * range have been unmapped and the pages have been freed by
 114         * the VM.
 115         *
 116         * The VM will remove the page table entries and potentially
 117         * the page between invalidate_range_start() and
 118         * invalidate_range_end(). If the page must not be freed
 119         * because of pending I/O or other circumstances then the
 120         * invalidate_range_start() callback (or the initial mapping
 121         * by the driver) must make sure that the refcount is kept
 122         * elevated.
 123         *
 124         * If the driver increases the refcount when the pages are
 125         * initially mapped into an address space then either
 126         * invalidate_range_start() or invalidate_range_end() may
 127         * decrease the refcount. If the refcount is decreased on
 128         * invalidate_range_start() then the VM can free pages as page
 129         * table entries are removed.  If the refcount is only
 130         * droppped on invalidate_range_end() then the driver itself
 131         * will drop the last refcount but it must take care to flush
 132         * any secondary tlb before doing the final free on the
 133         * page. Pages will no longer be referenced by the linux
 134         * address space but may still be referenced by sptes until
 135         * the last refcount is dropped.
 136         */
 137        void (*invalidate_range_start)(struct mmu_notifier *mn,
 138                                       struct mm_struct *mm,
 139                                       unsigned long start, unsigned long end);
 140        void (*invalidate_range_end)(struct mmu_notifier *mn,
 141                                     struct mm_struct *mm,
 142                                     unsigned long start, unsigned long end);
 143};
 144
 145/*
 146 * The notifier chains are protected by mmap_sem and/or the reverse map
 147 * semaphores. Notifier chains are only changed when all reverse maps and
 148 * the mmap_sem locks are taken.
 149 *
 150 * Therefore notifier chains can only be traversed when either
 151 *
 152 * 1. mmap_sem is held.
 153 * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
 154 * 3. No other concurrent thread can access the list (release)
 155 */
 156struct mmu_notifier {
 157        struct hlist_node hlist;
 158        const struct mmu_notifier_ops *ops;
 159};
 160
 161static inline int mm_has_notifiers(struct mm_struct *mm)
 162{
 163        return unlikely(mm->mmu_notifier_mm);
 164}
 165
 166extern int mmu_notifier_register(struct mmu_notifier *mn,
 167                                 struct mm_struct *mm);
 168extern int __mmu_notifier_register(struct mmu_notifier *mn,
 169                                   struct mm_struct *mm);
 170extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 171                                    struct mm_struct *mm);
 172extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 173extern void __mmu_notifier_release(struct mm_struct *mm);
 174extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 175                                          unsigned long address);
 176extern int __mmu_notifier_test_young(struct mm_struct *mm,
 177                                     unsigned long address);
 178extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 179                                      unsigned long address, pte_t pte);
 180extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 181                                          unsigned long address);
 182extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 183                                  unsigned long start, unsigned long end);
 184extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 185                                  unsigned long start, unsigned long end);
 186
 187static inline void mmu_notifier_release(struct mm_struct *mm)
 188{
 189        if (mm_has_notifiers(mm))
 190                __mmu_notifier_release(mm);
 191}
 192
 193static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 194                                          unsigned long address)
 195{
 196        if (mm_has_notifiers(mm))
 197                return __mmu_notifier_clear_flush_young(mm, address);
 198        return 0;
 199}
 200
 201static inline int mmu_notifier_test_young(struct mm_struct *mm,
 202                                          unsigned long address)
 203{
 204        if (mm_has_notifiers(mm))
 205                return __mmu_notifier_test_young(mm, address);
 206        return 0;
 207}
 208
 209static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 210                                           unsigned long address, pte_t pte)
 211{
 212        if (mm_has_notifiers(mm))
 213                __mmu_notifier_change_pte(mm, address, pte);
 214}
 215
 216static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 217                                          unsigned long address)
 218{
 219        if (mm_has_notifiers(mm))
 220                __mmu_notifier_invalidate_page(mm, address);
 221}
 222
 223static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 224                                  unsigned long start, unsigned long end)
 225{
 226        if (mm_has_notifiers(mm))
 227                __mmu_notifier_invalidate_range_start(mm, start, end);
 228}
 229
 230static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 231                                  unsigned long start, unsigned long end)
 232{
 233        if (mm_has_notifiers(mm))
 234                __mmu_notifier_invalidate_range_end(mm, start, end);
 235}
 236
 237static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 238{
 239        mm->mmu_notifier_mm = NULL;
 240}
 241
 242static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 243{
 244        if (mm_has_notifiers(mm))
 245                __mmu_notifier_mm_destroy(mm);
 246}
 247
 248/*
 249 * These two macros will sometime replace ptep_clear_flush.
 250 * ptep_clear_flush is implemented as macro itself, so this also is
 251 * implemented as a macro until ptep_clear_flush will converted to an
 252 * inline function, to diminish the risk of compilation failure. The
 253 * invalidate_page method over time can be moved outside the PT lock
 254 * and these two macros can be later removed.
 255 */
 256#define ptep_clear_flush_notify(__vma, __address, __ptep)               \
 257({                                                                      \
 258        pte_t __pte;                                                    \
 259        struct vm_area_struct *___vma = __vma;                          \
 260        unsigned long ___address = __address;                           \
 261        __pte = ptep_clear_flush(___vma, ___address, __ptep);           \
 262        mmu_notifier_invalidate_page(___vma->vm_mm, ___address);        \
 263        __pte;                                                          \
 264})
 265
 266#define pmdp_clear_flush_notify(__vma, __address, __pmdp)               \
 267({                                                                      \
 268        pmd_t __pmd;                                                    \
 269        struct vm_area_struct *___vma = __vma;                          \
 270        unsigned long ___address = __address;                           \
 271        VM_BUG_ON(__address & ~HPAGE_PMD_MASK);                         \
 272        mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,  \
 273                                            (__address)+HPAGE_PMD_SIZE);\
 274        __pmd = pmdp_clear_flush(___vma, ___address, __pmdp);           \
 275        mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,    \
 276                                          (__address)+HPAGE_PMD_SIZE);  \
 277        __pmd;                                                          \
 278})
 279
 280#define pmdp_splitting_flush_notify(__vma, __address, __pmdp)           \
 281({                                                                      \
 282        struct vm_area_struct *___vma = __vma;                          \
 283        unsigned long ___address = __address;                           \
 284        VM_BUG_ON(__address & ~HPAGE_PMD_MASK);                         \
 285        mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address,  \
 286                                            (__address)+HPAGE_PMD_SIZE);\
 287        pmdp_splitting_flush(___vma, ___address, __pmdp);               \
 288        mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address,    \
 289                                          (__address)+HPAGE_PMD_SIZE);  \
 290})
 291
 292#define ptep_clear_flush_young_notify(__vma, __address, __ptep)         \
 293({                                                                      \
 294        int __young;                                                    \
 295        struct vm_area_struct *___vma = __vma;                          \
 296        unsigned long ___address = __address;                           \
 297        __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
 298        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 299                                                  ___address);          \
 300        __young;                                                        \
 301})
 302
 303#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp)         \
 304({                                                                      \
 305        int __young;                                                    \
 306        struct vm_area_struct *___vma = __vma;                          \
 307        unsigned long ___address = __address;                           \
 308        __young = pmdp_clear_flush_young(___vma, ___address, __pmdp);   \
 309        __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
 310                                                  ___address);          \
 311        __young;                                                        \
 312})
 313
 314#define set_pte_at_notify(__mm, __address, __ptep, __pte)               \
 315({                                                                      \
 316        struct mm_struct *___mm = __mm;                                 \
 317        unsigned long ___address = __address;                           \
 318        pte_t ___pte = __pte;                                           \
 319                                                                        \
 320        set_pte_at(___mm, ___address, __ptep, ___pte);                  \
 321        mmu_notifier_change_pte(___mm, ___address, ___pte);             \
 322})
 323
 324#else /* CONFIG_MMU_NOTIFIER */
 325
 326static inline void mmu_notifier_release(struct mm_struct *mm)
 327{
 328}
 329
 330static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 331                                          unsigned long address)
 332{
 333        return 0;
 334}
 335
 336static inline int mmu_notifier_test_young(struct mm_struct *mm,
 337                                          unsigned long address)
 338{
 339        return 0;
 340}
 341
 342static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 343                                           unsigned long address, pte_t pte)
 344{
 345}
 346
 347static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 348                                          unsigned long address)
 349{
 350}
 351
 352static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 353                                  unsigned long start, unsigned long end)
 354{
 355}
 356
 357static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 358                                  unsigned long start, unsigned long end)
 359{
 360}
 361
 362static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 363{
 364}
 365
 366static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 367{
 368}
 369
 370#define ptep_clear_flush_young_notify ptep_clear_flush_young
 371#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
 372#define ptep_clear_flush_notify ptep_clear_flush
 373#define pmdp_clear_flush_notify pmdp_clear_flush
 374#define pmdp_splitting_flush_notify pmdp_splitting_flush
 375#define set_pte_at_notify set_pte_at
 376
 377#endif /* CONFIG_MMU_NOTIFIER */
 378
 379#endif /* _LINUX_MMU_NOTIFIER_H */
 380