linux/arch/s390/mm/pgtable.c
<<
>>
Prefs
   1/*
   2 *    Copyright IBM Corp. 2007, 2011
   3 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/kernel.h>
   8#include <linux/errno.h>
   9#include <linux/gfp.h>
  10#include <linux/mm.h>
  11#include <linux/swap.h>
  12#include <linux/smp.h>
  13#include <linux/spinlock.h>
  14#include <linux/rcupdate.h>
  15#include <linux/slab.h>
  16#include <linux/swapops.h>
  17#include <linux/sysctl.h>
  18#include <linux/ksm.h>
  19#include <linux/mman.h>
  20
  21#include <asm/pgtable.h>
  22#include <asm/pgalloc.h>
  23#include <asm/tlb.h>
  24#include <asm/tlbflush.h>
  25#include <asm/mmu_context.h>
  26
  27static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  28                                      unsigned long addr, pte_t *ptep)
  29{
  30        int active, count;
  31        pte_t old;
  32
  33        old = *ptep;
  34        if (unlikely(pte_val(old) & _PAGE_INVALID))
  35                return old;
  36        active = (mm == current->active_mm) ? 1 : 0;
  37        count = atomic_add_return(0x10000, &mm->context.attach_count);
  38        if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
  39            cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  40                __ptep_ipte_local(addr, ptep);
  41        else
  42                __ptep_ipte(addr, ptep);
  43        atomic_sub(0x10000, &mm->context.attach_count);
  44        return old;
  45}
  46
  47static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
  48                                    unsigned long addr, pte_t *ptep)
  49{
  50        int active, count;
  51        pte_t old;
  52
  53        old = *ptep;
  54        if (unlikely(pte_val(old) & _PAGE_INVALID))
  55                return old;
  56        active = (mm == current->active_mm) ? 1 : 0;
  57        count = atomic_add_return(0x10000, &mm->context.attach_count);
  58        if ((count & 0xffff) <= active) {
  59                pte_val(*ptep) |= _PAGE_INVALID;
  60                mm->context.flush_mm = 1;
  61        } else
  62                __ptep_ipte(addr, ptep);
  63        atomic_sub(0x10000, &mm->context.attach_count);
  64        return old;
  65}
  66
  67static inline pgste_t pgste_get_lock(pte_t *ptep)
  68{
  69        unsigned long new = 0;
  70#ifdef CONFIG_PGSTE
  71        unsigned long old;
  72
  73        preempt_disable();
  74        asm(
  75                "       lg      %0,%2\n"
  76                "0:     lgr     %1,%0\n"
  77                "       nihh    %0,0xff7f\n"    /* clear PCL bit in old */
  78                "       oihh    %1,0x0080\n"    /* set PCL bit in new */
  79                "       csg     %0,%1,%2\n"
  80                "       jl      0b\n"
  81                : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
  82                : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
  83#endif
  84        return __pgste(new);
  85}
  86
  87static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
  88{
  89#ifdef CONFIG_PGSTE
  90        asm(
  91                "       nihh    %1,0xff7f\n"    /* clear PCL bit */
  92                "       stg     %1,%0\n"
  93                : "=Q" (ptep[PTRS_PER_PTE])
  94                : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
  95                : "cc", "memory");
  96        preempt_enable();
  97#endif
  98}
  99
 100static inline pgste_t pgste_get(pte_t *ptep)
 101{
 102        unsigned long pgste = 0;
 103#ifdef CONFIG_PGSTE
 104        pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
 105#endif
 106        return __pgste(pgste);
 107}
 108
 109static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 110{
 111#ifdef CONFIG_PGSTE
 112        *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
 113#endif
 114}
 115
 116static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
 117                                       struct mm_struct *mm)
 118{
 119#ifdef CONFIG_PGSTE
 120        unsigned long address, bits, skey;
 121
 122        if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
 123                return pgste;
 124        address = pte_val(pte) & PAGE_MASK;
 125        skey = (unsigned long) page_get_storage_key(address);
 126        bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 127        /* Transfer page changed & referenced bit to guest bits in pgste */
 128        pgste_val(pgste) |= bits << 48;         /* GR bit & GC bit */
 129        /* Copy page access key and fetch protection bit to pgste */
 130        pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 131        pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 132#endif
 133        return pgste;
 134
 135}
 136
 137static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
 138                                 struct mm_struct *mm)
 139{
 140#ifdef CONFIG_PGSTE
 141        unsigned long address;
 142        unsigned long nkey;
 143
 144        if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
 145                return;
 146        VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 147        address = pte_val(entry) & PAGE_MASK;
 148        /*
 149         * Set page access key and fetch protection bit from pgste.
 150         * The guest C/R information is still in the PGSTE, set real
 151         * key C/R to 0.
 152         */
 153        nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 154        nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 155        page_set_storage_key(address, nkey, 0);
 156#endif
 157}
 158
 159static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 160{
 161#ifdef CONFIG_PGSTE
 162        if ((pte_val(entry) & _PAGE_PRESENT) &&
 163            (pte_val(entry) & _PAGE_WRITE) &&
 164            !(pte_val(entry) & _PAGE_INVALID)) {
 165                if (!MACHINE_HAS_ESOP) {
 166                        /*
 167                         * Without enhanced suppression-on-protection force
 168                         * the dirty bit on for all writable ptes.
 169                         */
 170                        pte_val(entry) |= _PAGE_DIRTY;
 171                        pte_val(entry) &= ~_PAGE_PROTECT;
 172                }
 173                if (!(pte_val(entry) & _PAGE_PROTECT))
 174                        /* This pte allows write access, set user-dirty */
 175                        pgste_val(pgste) |= PGSTE_UC_BIT;
 176        }
 177#endif
 178        *ptep = entry;
 179        return pgste;
 180}
 181
 182static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
 183                                        unsigned long addr,
 184                                        pte_t *ptep, pgste_t pgste)
 185{
 186#ifdef CONFIG_PGSTE
 187        if (pgste_val(pgste) & PGSTE_IN_BIT) {
 188                pgste_val(pgste) &= ~PGSTE_IN_BIT;
 189                ptep_notify(mm, addr, ptep);
 190        }
 191#endif
 192        return pgste;
 193}
 194
 195static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
 196                                      unsigned long addr, pte_t *ptep)
 197{
 198        pgste_t pgste = __pgste(0);
 199
 200        if (mm_has_pgste(mm)) {
 201                pgste = pgste_get_lock(ptep);
 202                pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
 203        }
 204        return pgste;
 205}
 206
 207static inline void ptep_xchg_commit(struct mm_struct *mm,
 208                                    unsigned long addr, pte_t *ptep,
 209                                    pgste_t pgste, pte_t old, pte_t new)
 210{
 211        if (mm_has_pgste(mm)) {
 212                if (pte_val(old) & _PAGE_INVALID)
 213                        pgste_set_key(ptep, pgste, new, mm);
 214                if (pte_val(new) & _PAGE_INVALID) {
 215                        pgste = pgste_update_all(old, pgste, mm);
 216                        if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 217                            _PGSTE_GPS_USAGE_UNUSED)
 218                                pte_val(old) |= _PAGE_UNUSED;
 219                }
 220                pgste = pgste_set_pte(ptep, pgste, new);
 221                pgste_set_unlock(ptep, pgste);
 222        } else {
 223                *ptep = new;
 224        }
 225}
 226
 227pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 228                       pte_t *ptep, pte_t new)
 229{
 230        pgste_t pgste;
 231        pte_t old;
 232
 233        pgste = ptep_xchg_start(mm, addr, ptep);
 234        old = ptep_flush_direct(mm, addr, ptep);
 235        ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 236        return old;
 237}
 238EXPORT_SYMBOL(ptep_xchg_direct);
 239
 240pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 241                     pte_t *ptep, pte_t new)
 242{
 243        pgste_t pgste;
 244        pte_t old;
 245
 246        pgste = ptep_xchg_start(mm, addr, ptep);
 247        old = ptep_flush_lazy(mm, addr, ptep);
 248        ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 249        return old;
 250}
 251EXPORT_SYMBOL(ptep_xchg_lazy);
 252
 253pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 254                             pte_t *ptep)
 255{
 256        pgste_t pgste;
 257        pte_t old;
 258
 259        pgste = ptep_xchg_start(mm, addr, ptep);
 260        old = ptep_flush_lazy(mm, addr, ptep);
 261        if (mm_has_pgste(mm)) {
 262                pgste = pgste_update_all(old, pgste, mm);
 263                pgste_set(ptep, pgste);
 264        }
 265        return old;
 266}
 267EXPORT_SYMBOL(ptep_modify_prot_start);
 268
 269void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 270                             pte_t *ptep, pte_t pte)
 271{
 272        pgste_t pgste;
 273
 274        if (mm_has_pgste(mm)) {
 275                pgste = pgste_get(ptep);
 276                pgste_set_key(ptep, pgste, pte, mm);
 277                pgste = pgste_set_pte(ptep, pgste, pte);
 278                pgste_set_unlock(ptep, pgste);
 279        } else {
 280                *ptep = pte;
 281        }
 282}
 283EXPORT_SYMBOL(ptep_modify_prot_commit);
 284
 285static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 286                                      unsigned long addr, pmd_t *pmdp)
 287{
 288        int active, count;
 289        pmd_t old;
 290
 291        old = *pmdp;
 292        if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 293                return old;
 294        if (!MACHINE_HAS_IDTE) {
 295                __pmdp_csp(pmdp);
 296                return old;
 297        }
 298        active = (mm == current->active_mm) ? 1 : 0;
 299        count = atomic_add_return(0x10000, &mm->context.attach_count);
 300        if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
 301            cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 302                __pmdp_idte_local(addr, pmdp);
 303        else
 304                __pmdp_idte(addr, pmdp);
 305        atomic_sub(0x10000, &mm->context.attach_count);
 306        return old;
 307}
 308
 309static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 310                                    unsigned long addr, pmd_t *pmdp)
 311{
 312        int active, count;
 313        pmd_t old;
 314
 315        old = *pmdp;
 316        if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 317                return old;
 318        active = (mm == current->active_mm) ? 1 : 0;
 319        count = atomic_add_return(0x10000, &mm->context.attach_count);
 320        if ((count & 0xffff) <= active) {
 321                pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 322                mm->context.flush_mm = 1;
 323        } else if (MACHINE_HAS_IDTE)
 324                __pmdp_idte(addr, pmdp);
 325        else
 326                __pmdp_csp(pmdp);
 327        atomic_sub(0x10000, &mm->context.attach_count);
 328        return old;
 329}
 330
 331pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 332                       pmd_t *pmdp, pmd_t new)
 333{
 334        pmd_t old;
 335
 336        old = pmdp_flush_direct(mm, addr, pmdp);
 337        *pmdp = new;
 338        return old;
 339}
 340EXPORT_SYMBOL(pmdp_xchg_direct);
 341
 342pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 343                     pmd_t *pmdp, pmd_t new)
 344{
 345        pmd_t old;
 346
 347        old = pmdp_flush_lazy(mm, addr, pmdp);
 348        *pmdp = new;
 349        return old;
 350}
 351EXPORT_SYMBOL(pmdp_xchg_lazy);
 352
 353#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 354void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 355                                pgtable_t pgtable)
 356{
 357        struct list_head *lh = (struct list_head *) pgtable;
 358
 359        assert_spin_locked(pmd_lockptr(mm, pmdp));
 360
 361        /* FIFO */
 362        if (!pmd_huge_pte(mm, pmdp))
 363                INIT_LIST_HEAD(lh);
 364        else
 365                list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
 366        pmd_huge_pte(mm, pmdp) = pgtable;
 367}
 368
 369pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 370{
 371        struct list_head *lh;
 372        pgtable_t pgtable;
 373        pte_t *ptep;
 374
 375        assert_spin_locked(pmd_lockptr(mm, pmdp));
 376
 377        /* FIFO */
 378        pgtable = pmd_huge_pte(mm, pmdp);
 379        lh = (struct list_head *) pgtable;
 380        if (list_empty(lh))
 381                pmd_huge_pte(mm, pmdp) = NULL;
 382        else {
 383                pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
 384                list_del(lh);
 385        }
 386        ptep = (pte_t *) pgtable;
 387        pte_val(*ptep) = _PAGE_INVALID;
 388        ptep++;
 389        pte_val(*ptep) = _PAGE_INVALID;
 390        return pgtable;
 391}
 392#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 393
 394#ifdef CONFIG_PGSTE
 395void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 396                     pte_t *ptep, pte_t entry)
 397{
 398        pgste_t pgste;
 399
 400        /* the mm_has_pgste() check is done in set_pte_at() */
 401        pgste = pgste_get_lock(ptep);
 402        pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 403        pgste_set_key(ptep, pgste, entry, mm);
 404        pgste = pgste_set_pte(ptep, pgste, entry);
 405        pgste_set_unlock(ptep, pgste);
 406}
 407
 408void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 409{
 410        pgste_t pgste;
 411
 412        pgste = pgste_get_lock(ptep);
 413        pgste_val(pgste) |= PGSTE_IN_BIT;
 414        pgste_set_unlock(ptep, pgste);
 415}
 416
 417static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 418{
 419        if (!non_swap_entry(entry))
 420                dec_mm_counter(mm, MM_SWAPENTS);
 421        else if (is_migration_entry(entry)) {
 422                struct page *page = migration_entry_to_page(entry);
 423
 424                dec_mm_counter(mm, mm_counter(page));
 425        }
 426        free_swap_and_cache(entry);
 427}
 428
 429void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 430                     pte_t *ptep, int reset)
 431{
 432        unsigned long pgstev;
 433        pgste_t pgste;
 434        pte_t pte;
 435
 436        /* Zap unused and logically-zero pages */
 437        pgste = pgste_get_lock(ptep);
 438        pgstev = pgste_val(pgste);
 439        pte = *ptep;
 440        if (!reset && pte_swap(pte) &&
 441            ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
 442             (pgstev & _PGSTE_GPS_ZERO))) {
 443                ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
 444                pte_clear(mm, addr, ptep);
 445        }
 446        if (reset)
 447                pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 448        pgste_set_unlock(ptep, pgste);
 449}
 450
 451void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 452{
 453        unsigned long ptev;
 454        pgste_t pgste;
 455
 456        /* Clear storage key */
 457        pgste = pgste_get_lock(ptep);
 458        pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 459                              PGSTE_GR_BIT | PGSTE_GC_BIT);
 460        ptev = pte_val(*ptep);
 461        if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 462                page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 463        pgste_set_unlock(ptep, pgste);
 464}
 465
 466/*
 467 * Test and reset if a guest page is dirty
 468 */
 469bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 470{
 471        spinlock_t *ptl;
 472        pgste_t pgste;
 473        pte_t *ptep;
 474        pte_t pte;
 475        bool dirty;
 476
 477        ptep = get_locked_pte(mm, addr, &ptl);
 478        if (unlikely(!ptep))
 479                return false;
 480
 481        pgste = pgste_get_lock(ptep);
 482        dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
 483        pgste_val(pgste) &= ~PGSTE_UC_BIT;
 484        pte = *ptep;
 485        if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 486                pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
 487                __ptep_ipte(addr, ptep);
 488                if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 489                        pte_val(pte) |= _PAGE_PROTECT;
 490                else
 491                        pte_val(pte) |= _PAGE_INVALID;
 492                *ptep = pte;
 493        }
 494        pgste_set_unlock(ptep, pgste);
 495
 496        spin_unlock(ptl);
 497        return dirty;
 498}
 499EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
 500
 501int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 502                          unsigned char key, bool nq)
 503{
 504        unsigned long keyul;
 505        spinlock_t *ptl;
 506        pgste_t old, new;
 507        pte_t *ptep;
 508
 509        down_read(&mm->mmap_sem);
 510        ptep = get_locked_pte(mm, addr, &ptl);
 511        if (unlikely(!ptep)) {
 512                up_read(&mm->mmap_sem);
 513                return -EFAULT;
 514        }
 515
 516        new = old = pgste_get_lock(ptep);
 517        pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
 518                            PGSTE_ACC_BITS | PGSTE_FP_BIT);
 519        keyul = (unsigned long) key;
 520        pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 521        pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 522        if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 523                unsigned long address, bits, skey;
 524
 525                address = pte_val(*ptep) & PAGE_MASK;
 526                skey = (unsigned long) page_get_storage_key(address);
 527                bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 528                skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
 529                /* Set storage key ACC and FP */
 530                page_set_storage_key(address, skey, !nq);
 531                /* Merge host changed & referenced into pgste  */
 532                pgste_val(new) |= bits << 52;
 533        }
 534        /* changing the guest storage key is considered a change of the page */
 535        if ((pgste_val(new) ^ pgste_val(old)) &
 536            (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
 537                pgste_val(new) |= PGSTE_UC_BIT;
 538
 539        pgste_set_unlock(ptep, new);
 540        pte_unmap_unlock(ptep, ptl);
 541        up_read(&mm->mmap_sem);
 542        return 0;
 543}
 544EXPORT_SYMBOL(set_guest_storage_key);
 545
 546unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
 547{
 548        unsigned char key;
 549        spinlock_t *ptl;
 550        pgste_t pgste;
 551        pte_t *ptep;
 552
 553        down_read(&mm->mmap_sem);
 554        ptep = get_locked_pte(mm, addr, &ptl);
 555        if (unlikely(!ptep)) {
 556                up_read(&mm->mmap_sem);
 557                return -EFAULT;
 558        }
 559        pgste = pgste_get_lock(ptep);
 560
 561        if (pte_val(*ptep) & _PAGE_INVALID) {
 562                key  = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
 563                key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
 564                key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
 565                key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
 566        } else {
 567                key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
 568
 569                /* Reflect guest's logical view, not physical */
 570                if (pgste_val(pgste) & PGSTE_GR_BIT)
 571                        key |= _PAGE_REFERENCED;
 572                if (pgste_val(pgste) & PGSTE_GC_BIT)
 573                        key |= _PAGE_CHANGED;
 574        }
 575
 576        pgste_set_unlock(ptep, pgste);
 577        pte_unmap_unlock(ptep, ptl);
 578        up_read(&mm->mmap_sem);
 579        return key;
 580}
 581EXPORT_SYMBOL(get_guest_storage_key);
 582#endif
 583