linux/arch/s390/mm/pgtable.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *    Copyright IBM Corp. 2007, 2011
   4 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
   5 */
   6
   7#include <linux/sched.h>
   8#include <linux/kernel.h>
   9#include <linux/errno.h>
  10#include <linux/gfp.h>
  11#include <linux/mm.h>
  12#include <linux/swap.h>
  13#include <linux/smp.h>
  14#include <linux/spinlock.h>
  15#include <linux/rcupdate.h>
  16#include <linux/slab.h>
  17#include <linux/swapops.h>
  18#include <linux/sysctl.h>
  19#include <linux/ksm.h>
  20#include <linux/mman.h>
  21
  22#include <asm/tlb.h>
  23#include <asm/tlbflush.h>
  24#include <asm/mmu_context.h>
  25#include <asm/page-states.h>
  26
  27pgprot_t pgprot_writecombine(pgprot_t prot)
  28{
  29        /*
  30         * mio_wb_bit_mask may be set on a different CPU, but it is only set
  31         * once at init and only read afterwards.
  32         */
  33        return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
  34}
  35EXPORT_SYMBOL_GPL(pgprot_writecombine);
  36
  37pgprot_t pgprot_writethrough(pgprot_t prot)
  38{
  39        /*
  40         * mio_wb_bit_mask may be set on a different CPU, but it is only set
  41         * once at init and only read afterwards.
  42         */
  43        return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
  44}
  45EXPORT_SYMBOL_GPL(pgprot_writethrough);
  46
  47static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
  48                                   pte_t *ptep, int nodat)
  49{
  50        unsigned long opt, asce;
  51
  52        if (MACHINE_HAS_TLB_GUEST) {
  53                opt = 0;
  54                asce = READ_ONCE(mm->context.gmap_asce);
  55                if (asce == 0UL || nodat)
  56                        opt |= IPTE_NODAT;
  57                if (asce != -1UL) {
  58                        asce = asce ? : mm->context.asce;
  59                        opt |= IPTE_GUEST_ASCE;
  60                }
  61                __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL);
  62        } else {
  63                __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL);
  64        }
  65}
  66
  67static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
  68                                    pte_t *ptep, int nodat)
  69{
  70        unsigned long opt, asce;
  71
  72        if (MACHINE_HAS_TLB_GUEST) {
  73                opt = 0;
  74                asce = READ_ONCE(mm->context.gmap_asce);
  75                if (asce == 0UL || nodat)
  76                        opt |= IPTE_NODAT;
  77                if (asce != -1UL) {
  78                        asce = asce ? : mm->context.asce;
  79                        opt |= IPTE_GUEST_ASCE;
  80                }
  81                __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL);
  82        } else {
  83                __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
  84        }
  85}
  86
  87static inline pte_t ptep_flush_direct(struct mm_struct *mm,
  88                                      unsigned long addr, pte_t *ptep,
  89                                      int nodat)
  90{
  91        pte_t old;
  92
  93        old = *ptep;
  94        if (unlikely(pte_val(old) & _PAGE_INVALID))
  95                return old;
  96        atomic_inc(&mm->context.flush_count);
  97        if (MACHINE_HAS_TLB_LC &&
  98            cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
  99                ptep_ipte_local(mm, addr, ptep, nodat);
 100        else
 101                ptep_ipte_global(mm, addr, ptep, nodat);
 102        atomic_dec(&mm->context.flush_count);
 103        return old;
 104}
 105
 106static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 107                                    unsigned long addr, pte_t *ptep,
 108                                    int nodat)
 109{
 110        pte_t old;
 111
 112        old = *ptep;
 113        if (unlikely(pte_val(old) & _PAGE_INVALID))
 114                return old;
 115        atomic_inc(&mm->context.flush_count);
 116        if (cpumask_equal(&mm->context.cpu_attach_mask,
 117                          cpumask_of(smp_processor_id()))) {
 118                pte_val(*ptep) |= _PAGE_INVALID;
 119                mm->context.flush_mm = 1;
 120        } else
 121                ptep_ipte_global(mm, addr, ptep, nodat);
 122        atomic_dec(&mm->context.flush_count);
 123        return old;
 124}
 125
 126static inline pgste_t pgste_get_lock(pte_t *ptep)
 127{
 128        unsigned long new = 0;
 129#ifdef CONFIG_PGSTE
 130        unsigned long old;
 131
 132        asm(
 133                "       lg      %0,%2\n"
 134                "0:     lgr     %1,%0\n"
 135                "       nihh    %0,0xff7f\n"    /* clear PCL bit in old */
 136                "       oihh    %1,0x0080\n"    /* set PCL bit in new */
 137                "       csg     %0,%1,%2\n"
 138                "       jl      0b\n"
 139                : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
 140                : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
 141#endif
 142        return __pgste(new);
 143}
 144
 145static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 146{
 147#ifdef CONFIG_PGSTE
 148        asm(
 149                "       nihh    %1,0xff7f\n"    /* clear PCL bit */
 150                "       stg     %1,%0\n"
 151                : "=Q" (ptep[PTRS_PER_PTE])
 152                : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
 153                : "cc", "memory");
 154#endif
 155}
 156
 157static inline pgste_t pgste_get(pte_t *ptep)
 158{
 159        unsigned long pgste = 0;
 160#ifdef CONFIG_PGSTE
 161        pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
 162#endif
 163        return __pgste(pgste);
 164}
 165
 166static inline void pgste_set(pte_t *ptep, pgste_t pgste)
 167{
 168#ifdef CONFIG_PGSTE
 169        *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
 170#endif
 171}
 172
 173static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
 174                                       struct mm_struct *mm)
 175{
 176#ifdef CONFIG_PGSTE
 177        unsigned long address, bits, skey;
 178
 179        if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
 180                return pgste;
 181        address = pte_val(pte) & PAGE_MASK;
 182        skey = (unsigned long) page_get_storage_key(address);
 183        bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 184        /* Transfer page changed & referenced bit to guest bits in pgste */
 185        pgste_val(pgste) |= bits << 48;         /* GR bit & GC bit */
 186        /* Copy page access key and fetch protection bit to pgste */
 187        pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 188        pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 189#endif
 190        return pgste;
 191
 192}
 193
 194static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
 195                                 struct mm_struct *mm)
 196{
 197#ifdef CONFIG_PGSTE
 198        unsigned long address;
 199        unsigned long nkey;
 200
 201        if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
 202                return;
 203        VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
 204        address = pte_val(entry) & PAGE_MASK;
 205        /*
 206         * Set page access key and fetch protection bit from pgste.
 207         * The guest C/R information is still in the PGSTE, set real
 208         * key C/R to 0.
 209         */
 210        nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 211        nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 212        page_set_storage_key(address, nkey, 0);
 213#endif
 214}
 215
 216static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 217{
 218#ifdef CONFIG_PGSTE
 219        if ((pte_val(entry) & _PAGE_PRESENT) &&
 220            (pte_val(entry) & _PAGE_WRITE) &&
 221            !(pte_val(entry) & _PAGE_INVALID)) {
 222                if (!MACHINE_HAS_ESOP) {
 223                        /*
 224                         * Without enhanced suppression-on-protection force
 225                         * the dirty bit on for all writable ptes.
 226                         */
 227                        pte_val(entry) |= _PAGE_DIRTY;
 228                        pte_val(entry) &= ~_PAGE_PROTECT;
 229                }
 230                if (!(pte_val(entry) & _PAGE_PROTECT))
 231                        /* This pte allows write access, set user-dirty */
 232                        pgste_val(pgste) |= PGSTE_UC_BIT;
 233        }
 234#endif
 235        *ptep = entry;
 236        return pgste;
 237}
 238
 239static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
 240                                       unsigned long addr,
 241                                       pte_t *ptep, pgste_t pgste)
 242{
 243#ifdef CONFIG_PGSTE
 244        unsigned long bits;
 245
 246        bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
 247        if (bits) {
 248                pgste_val(pgste) ^= bits;
 249                ptep_notify(mm, addr, ptep, bits);
 250        }
 251#endif
 252        return pgste;
 253}
 254
 255static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
 256                                      unsigned long addr, pte_t *ptep)
 257{
 258        pgste_t pgste = __pgste(0);
 259
 260        if (mm_has_pgste(mm)) {
 261                pgste = pgste_get_lock(ptep);
 262                pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 263        }
 264        return pgste;
 265}
 266
 267static inline pte_t ptep_xchg_commit(struct mm_struct *mm,
 268                                    unsigned long addr, pte_t *ptep,
 269                                    pgste_t pgste, pte_t old, pte_t new)
 270{
 271        if (mm_has_pgste(mm)) {
 272                if (pte_val(old) & _PAGE_INVALID)
 273                        pgste_set_key(ptep, pgste, new, mm);
 274                if (pte_val(new) & _PAGE_INVALID) {
 275                        pgste = pgste_update_all(old, pgste, mm);
 276                        if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
 277                            _PGSTE_GPS_USAGE_UNUSED)
 278                                pte_val(old) |= _PAGE_UNUSED;
 279                }
 280                pgste = pgste_set_pte(ptep, pgste, new);
 281                pgste_set_unlock(ptep, pgste);
 282        } else {
 283                *ptep = new;
 284        }
 285        return old;
 286}
 287
 288pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 289                       pte_t *ptep, pte_t new)
 290{
 291        pgste_t pgste;
 292        pte_t old;
 293        int nodat;
 294
 295        preempt_disable();
 296        pgste = ptep_xchg_start(mm, addr, ptep);
 297        nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 298        old = ptep_flush_direct(mm, addr, ptep, nodat);
 299        old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 300        preempt_enable();
 301        return old;
 302}
 303EXPORT_SYMBOL(ptep_xchg_direct);
 304
 305pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 306                     pte_t *ptep, pte_t new)
 307{
 308        pgste_t pgste;
 309        pte_t old;
 310        int nodat;
 311
 312        preempt_disable();
 313        pgste = ptep_xchg_start(mm, addr, ptep);
 314        nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 315        old = ptep_flush_lazy(mm, addr, ptep, nodat);
 316        old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
 317        preempt_enable();
 318        return old;
 319}
 320EXPORT_SYMBOL(ptep_xchg_lazy);
 321
 322pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 323                             pte_t *ptep)
 324{
 325        pgste_t pgste;
 326        pte_t old;
 327        int nodat;
 328        struct mm_struct *mm = vma->vm_mm;
 329
 330        preempt_disable();
 331        pgste = ptep_xchg_start(mm, addr, ptep);
 332        nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 333        old = ptep_flush_lazy(mm, addr, ptep, nodat);
 334        if (mm_has_pgste(mm)) {
 335                pgste = pgste_update_all(old, pgste, mm);
 336                pgste_set(ptep, pgste);
 337        }
 338        return old;
 339}
 340
 341void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 342                             pte_t *ptep, pte_t old_pte, pte_t pte)
 343{
 344        pgste_t pgste;
 345        struct mm_struct *mm = vma->vm_mm;
 346
 347        if (!MACHINE_HAS_NX)
 348                pte_val(pte) &= ~_PAGE_NOEXEC;
 349        if (mm_has_pgste(mm)) {
 350                pgste = pgste_get(ptep);
 351                pgste_set_key(ptep, pgste, pte, mm);
 352                pgste = pgste_set_pte(ptep, pgste, pte);
 353                pgste_set_unlock(ptep, pgste);
 354        } else {
 355                *ptep = pte;
 356        }
 357        preempt_enable();
 358}
 359
 360static inline void pmdp_idte_local(struct mm_struct *mm,
 361                                   unsigned long addr, pmd_t *pmdp)
 362{
 363        if (MACHINE_HAS_TLB_GUEST)
 364                __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 365                            mm->context.asce, IDTE_LOCAL);
 366        else
 367                __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
 368        if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 369                gmap_pmdp_idte_local(mm, addr);
 370}
 371
 372static inline void pmdp_idte_global(struct mm_struct *mm,
 373                                    unsigned long addr, pmd_t *pmdp)
 374{
 375        if (MACHINE_HAS_TLB_GUEST) {
 376                __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
 377                            mm->context.asce, IDTE_GLOBAL);
 378                if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 379                        gmap_pmdp_idte_global(mm, addr);
 380        } else if (MACHINE_HAS_IDTE) {
 381                __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
 382                if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 383                        gmap_pmdp_idte_global(mm, addr);
 384        } else {
 385                __pmdp_csp(pmdp);
 386                if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
 387                        gmap_pmdp_csp(mm, addr);
 388        }
 389}
 390
 391static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 392                                      unsigned long addr, pmd_t *pmdp)
 393{
 394        pmd_t old;
 395
 396        old = *pmdp;
 397        if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 398                return old;
 399        atomic_inc(&mm->context.flush_count);
 400        if (MACHINE_HAS_TLB_LC &&
 401            cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 402                pmdp_idte_local(mm, addr, pmdp);
 403        else
 404                pmdp_idte_global(mm, addr, pmdp);
 405        atomic_dec(&mm->context.flush_count);
 406        return old;
 407}
 408
 409static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 410                                    unsigned long addr, pmd_t *pmdp)
 411{
 412        pmd_t old;
 413
 414        old = *pmdp;
 415        if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 416                return old;
 417        atomic_inc(&mm->context.flush_count);
 418        if (cpumask_equal(&mm->context.cpu_attach_mask,
 419                          cpumask_of(smp_processor_id()))) {
 420                pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 421                mm->context.flush_mm = 1;
 422                if (mm_has_pgste(mm))
 423                        gmap_pmdp_invalidate(mm, addr);
 424        } else {
 425                pmdp_idte_global(mm, addr, pmdp);
 426        }
 427        atomic_dec(&mm->context.flush_count);
 428        return old;
 429}
 430
 431#ifdef CONFIG_PGSTE
 432static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
 433{
 434        pgd_t *pgd;
 435        p4d_t *p4d;
 436        pud_t *pud;
 437        pmd_t *pmd;
 438
 439        pgd = pgd_offset(mm, addr);
 440        p4d = p4d_alloc(mm, pgd, addr);
 441        if (!p4d)
 442                return NULL;
 443        pud = pud_alloc(mm, p4d, addr);
 444        if (!pud)
 445                return NULL;
 446        pmd = pmd_alloc(mm, pud, addr);
 447        return pmd;
 448}
 449#endif
 450
 451pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 452                       pmd_t *pmdp, pmd_t new)
 453{
 454        pmd_t old;
 455
 456        preempt_disable();
 457        old = pmdp_flush_direct(mm, addr, pmdp);
 458        *pmdp = new;
 459        preempt_enable();
 460        return old;
 461}
 462EXPORT_SYMBOL(pmdp_xchg_direct);
 463
 464pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 465                     pmd_t *pmdp, pmd_t new)
 466{
 467        pmd_t old;
 468
 469        preempt_disable();
 470        old = pmdp_flush_lazy(mm, addr, pmdp);
 471        *pmdp = new;
 472        preempt_enable();
 473        return old;
 474}
 475EXPORT_SYMBOL(pmdp_xchg_lazy);
 476
 477static inline void pudp_idte_local(struct mm_struct *mm,
 478                                   unsigned long addr, pud_t *pudp)
 479{
 480        if (MACHINE_HAS_TLB_GUEST)
 481                __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 482                            mm->context.asce, IDTE_LOCAL);
 483        else
 484                __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL);
 485}
 486
 487static inline void pudp_idte_global(struct mm_struct *mm,
 488                                    unsigned long addr, pud_t *pudp)
 489{
 490        if (MACHINE_HAS_TLB_GUEST)
 491                __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE,
 492                            mm->context.asce, IDTE_GLOBAL);
 493        else if (MACHINE_HAS_IDTE)
 494                __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL);
 495        else
 496                /*
 497                 * Invalid bit position is the same for pmd and pud, so we can
 498                 * re-use _pmd_csp() here
 499                 */
 500                __pmdp_csp((pmd_t *) pudp);
 501}
 502
 503static inline pud_t pudp_flush_direct(struct mm_struct *mm,
 504                                      unsigned long addr, pud_t *pudp)
 505{
 506        pud_t old;
 507
 508        old = *pudp;
 509        if (pud_val(old) & _REGION_ENTRY_INVALID)
 510                return old;
 511        atomic_inc(&mm->context.flush_count);
 512        if (MACHINE_HAS_TLB_LC &&
 513            cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 514                pudp_idte_local(mm, addr, pudp);
 515        else
 516                pudp_idte_global(mm, addr, pudp);
 517        atomic_dec(&mm->context.flush_count);
 518        return old;
 519}
 520
 521pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 522                       pud_t *pudp, pud_t new)
 523{
 524        pud_t old;
 525
 526        preempt_disable();
 527        old = pudp_flush_direct(mm, addr, pudp);
 528        *pudp = new;
 529        preempt_enable();
 530        return old;
 531}
 532EXPORT_SYMBOL(pudp_xchg_direct);
 533
 534#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 535void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 536                                pgtable_t pgtable)
 537{
 538        struct list_head *lh = (struct list_head *) pgtable;
 539
 540        assert_spin_locked(pmd_lockptr(mm, pmdp));
 541
 542        /* FIFO */
 543        if (!pmd_huge_pte(mm, pmdp))
 544                INIT_LIST_HEAD(lh);
 545        else
 546                list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
 547        pmd_huge_pte(mm, pmdp) = pgtable;
 548}
 549
 550pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 551{
 552        struct list_head *lh;
 553        pgtable_t pgtable;
 554        pte_t *ptep;
 555
 556        assert_spin_locked(pmd_lockptr(mm, pmdp));
 557
 558        /* FIFO */
 559        pgtable = pmd_huge_pte(mm, pmdp);
 560        lh = (struct list_head *) pgtable;
 561        if (list_empty(lh))
 562                pmd_huge_pte(mm, pmdp) = NULL;
 563        else {
 564                pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
 565                list_del(lh);
 566        }
 567        ptep = (pte_t *) pgtable;
 568        pte_val(*ptep) = _PAGE_INVALID;
 569        ptep++;
 570        pte_val(*ptep) = _PAGE_INVALID;
 571        return pgtable;
 572}
 573#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 574
 575#ifdef CONFIG_PGSTE
 576void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 577                     pte_t *ptep, pte_t entry)
 578{
 579        pgste_t pgste;
 580
 581        /* the mm_has_pgste() check is done in set_pte_at() */
 582        preempt_disable();
 583        pgste = pgste_get_lock(ptep);
 584        pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 585        pgste_set_key(ptep, pgste, entry, mm);
 586        pgste = pgste_set_pte(ptep, pgste, entry);
 587        pgste_set_unlock(ptep, pgste);
 588        preempt_enable();
 589}
 590
 591void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 592{
 593        pgste_t pgste;
 594
 595        preempt_disable();
 596        pgste = pgste_get_lock(ptep);
 597        pgste_val(pgste) |= PGSTE_IN_BIT;
 598        pgste_set_unlock(ptep, pgste);
 599        preempt_enable();
 600}
 601
 602/**
 603 * ptep_force_prot - change access rights of a locked pte
 604 * @mm: pointer to the process mm_struct
 605 * @addr: virtual address in the guest address space
 606 * @ptep: pointer to the page table entry
 607 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE
 608 * @bit: pgste bit to set (e.g. for notification)
 609 *
 610 * Returns 0 if the access rights were changed and -EAGAIN if the current
 611 * and requested access rights are incompatible.
 612 */
 613int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
 614                    pte_t *ptep, int prot, unsigned long bit)
 615{
 616        pte_t entry;
 617        pgste_t pgste;
 618        int pte_i, pte_p, nodat;
 619
 620        pgste = pgste_get_lock(ptep);
 621        entry = *ptep;
 622        /* Check pte entry after all locks have been acquired */
 623        pte_i = pte_val(entry) & _PAGE_INVALID;
 624        pte_p = pte_val(entry) & _PAGE_PROTECT;
 625        if ((pte_i && (prot != PROT_NONE)) ||
 626            (pte_p && (prot & PROT_WRITE))) {
 627                pgste_set_unlock(ptep, pgste);
 628                return -EAGAIN;
 629        }
 630        /* Change access rights and set pgste bit */
 631        nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 632        if (prot == PROT_NONE && !pte_i) {
 633                ptep_flush_direct(mm, addr, ptep, nodat);
 634                pgste = pgste_update_all(entry, pgste, mm);
 635                pte_val(entry) |= _PAGE_INVALID;
 636        }
 637        if (prot == PROT_READ && !pte_p) {
 638                ptep_flush_direct(mm, addr, ptep, nodat);
 639                pte_val(entry) &= ~_PAGE_INVALID;
 640                pte_val(entry) |= _PAGE_PROTECT;
 641        }
 642        pgste_val(pgste) |= bit;
 643        pgste = pgste_set_pte(ptep, pgste, entry);
 644        pgste_set_unlock(ptep, pgste);
 645        return 0;
 646}
 647
 648int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
 649                    pte_t *sptep, pte_t *tptep, pte_t pte)
 650{
 651        pgste_t spgste, tpgste;
 652        pte_t spte, tpte;
 653        int rc = -EAGAIN;
 654
 655        if (!(pte_val(*tptep) & _PAGE_INVALID))
 656                return 0;       /* already shadowed */
 657        spgste = pgste_get_lock(sptep);
 658        spte = *sptep;
 659        if (!(pte_val(spte) & _PAGE_INVALID) &&
 660            !((pte_val(spte) & _PAGE_PROTECT) &&
 661              !(pte_val(pte) & _PAGE_PROTECT))) {
 662                pgste_val(spgste) |= PGSTE_VSIE_BIT;
 663                tpgste = pgste_get_lock(tptep);
 664                pte_val(tpte) = (pte_val(spte) & PAGE_MASK) |
 665                                (pte_val(pte) & _PAGE_PROTECT);
 666                /* don't touch the storage key - it belongs to parent pgste */
 667                tpgste = pgste_set_pte(tptep, tpgste, tpte);
 668                pgste_set_unlock(tptep, tpgste);
 669                rc = 1;
 670        }
 671        pgste_set_unlock(sptep, spgste);
 672        return rc;
 673}
 674
 675void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep)
 676{
 677        pgste_t pgste;
 678        int nodat;
 679
 680        pgste = pgste_get_lock(ptep);
 681        /* notifier is called by the caller */
 682        nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 683        ptep_flush_direct(mm, saddr, ptep, nodat);
 684        /* don't touch the storage key - it belongs to parent pgste */
 685        pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID));
 686        pgste_set_unlock(ptep, pgste);
 687}
 688
 689static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
 690{
 691        if (!non_swap_entry(entry))
 692                dec_mm_counter(mm, MM_SWAPENTS);
 693        else if (is_migration_entry(entry)) {
 694                struct page *page = migration_entry_to_page(entry);
 695
 696                dec_mm_counter(mm, mm_counter(page));
 697        }
 698        free_swap_and_cache(entry);
 699}
 700
 701void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 702                     pte_t *ptep, int reset)
 703{
 704        unsigned long pgstev;
 705        pgste_t pgste;
 706        pte_t pte;
 707
 708        /* Zap unused and logically-zero pages */
 709        preempt_disable();
 710        pgste = pgste_get_lock(ptep);
 711        pgstev = pgste_val(pgste);
 712        pte = *ptep;
 713        if (!reset && pte_swap(pte) &&
 714            ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
 715             (pgstev & _PGSTE_GPS_ZERO))) {
 716                ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
 717                pte_clear(mm, addr, ptep);
 718        }
 719        if (reset)
 720                pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 721        pgste_set_unlock(ptep, pgste);
 722        preempt_enable();
 723}
 724
 725void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 726{
 727        unsigned long ptev;
 728        pgste_t pgste;
 729
 730        /* Clear storage key ACC and F, but set R/C */
 731        preempt_disable();
 732        pgste = pgste_get_lock(ptep);
 733        pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
 734        pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
 735        ptev = pte_val(*ptep);
 736        if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 737                page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 738        pgste_set_unlock(ptep, pgste);
 739        preempt_enable();
 740}
 741
 742/*
 743 * Test and reset if a guest page is dirty
 744 */
 745bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
 746                       pte_t *ptep)
 747{
 748        pgste_t pgste;
 749        pte_t pte;
 750        bool dirty;
 751        int nodat;
 752
 753        pgste = pgste_get_lock(ptep);
 754        dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
 755        pgste_val(pgste) &= ~PGSTE_UC_BIT;
 756        pte = *ptep;
 757        if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 758                pgste = pgste_pte_notify(mm, addr, ptep, pgste);
 759                nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
 760                ptep_ipte_global(mm, addr, ptep, nodat);
 761                if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 762                        pte_val(pte) |= _PAGE_PROTECT;
 763                else
 764                        pte_val(pte) |= _PAGE_INVALID;
 765                *ptep = pte;
 766        }
 767        pgste_set_unlock(ptep, pgste);
 768        return dirty;
 769}
 770EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
 771
 772int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 773                          unsigned char key, bool nq)
 774{
 775        unsigned long keyul, paddr;
 776        spinlock_t *ptl;
 777        pgste_t old, new;
 778        pmd_t *pmdp;
 779        pte_t *ptep;
 780
 781        pmdp = pmd_alloc_map(mm, addr);
 782        if (unlikely(!pmdp))
 783                return -EFAULT;
 784
 785        ptl = pmd_lock(mm, pmdp);
 786        if (!pmd_present(*pmdp)) {
 787                spin_unlock(ptl);
 788                return -EFAULT;
 789        }
 790
 791        if (pmd_large(*pmdp)) {
 792                paddr = pmd_val(*pmdp) & HPAGE_MASK;
 793                paddr |= addr & ~HPAGE_MASK;
 794                /*
 795                 * Huge pmds need quiescing operations, they are
 796                 * always mapped.
 797                 */
 798                page_set_storage_key(paddr, key, 1);
 799                spin_unlock(ptl);
 800                return 0;
 801        }
 802        spin_unlock(ptl);
 803
 804        ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
 805        if (unlikely(!ptep))
 806                return -EFAULT;
 807
 808        new = old = pgste_get_lock(ptep);
 809        pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
 810                            PGSTE_ACC_BITS | PGSTE_FP_BIT);
 811        keyul = (unsigned long) key;
 812        pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
 813        pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
 814        if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 815                unsigned long bits, skey;
 816
 817                paddr = pte_val(*ptep) & PAGE_MASK;
 818                skey = (unsigned long) page_get_storage_key(paddr);
 819                bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 820                skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
 821                /* Set storage key ACC and FP */
 822                page_set_storage_key(paddr, skey, !nq);
 823                /* Merge host changed & referenced into pgste  */
 824                pgste_val(new) |= bits << 52;
 825        }
 826        /* changing the guest storage key is considered a change of the page */
 827        if ((pgste_val(new) ^ pgste_val(old)) &
 828            (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
 829                pgste_val(new) |= PGSTE_UC_BIT;
 830
 831        pgste_set_unlock(ptep, new);
 832        pte_unmap_unlock(ptep, ptl);
 833        return 0;
 834}
 835EXPORT_SYMBOL(set_guest_storage_key);
 836
 837/**
 838 * Conditionally set a guest storage key (handling csske).
 839 * oldkey will be updated when either mr or mc is set and a pointer is given.
 840 *
 841 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
 842 * storage key was updated and -EFAULT on access errors.
 843 */
 844int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 845                               unsigned char key, unsigned char *oldkey,
 846                               bool nq, bool mr, bool mc)
 847{
 848        unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
 849        int rc;
 850
 851        /* we can drop the pgste lock between getting and setting the key */
 852        if (mr | mc) {
 853                rc = get_guest_storage_key(current->mm, addr, &tmp);
 854                if (rc)
 855                        return rc;
 856                if (oldkey)
 857                        *oldkey = tmp;
 858                if (!mr)
 859                        mask |= _PAGE_REFERENCED;
 860                if (!mc)
 861                        mask |= _PAGE_CHANGED;
 862                if (!((tmp ^ key) & mask))
 863                        return 0;
 864        }
 865        rc = set_guest_storage_key(current->mm, addr, key, nq);
 866        return rc < 0 ? rc : 1;
 867}
 868EXPORT_SYMBOL(cond_set_guest_storage_key);
 869
 870/**
 871 * Reset a guest reference bit (rrbe), returning the reference and changed bit.
 872 *
 873 * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
 874 */
 875int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
 876{
 877        spinlock_t *ptl;
 878        unsigned long paddr;
 879        pgste_t old, new;
 880        pmd_t *pmdp;
 881        pte_t *ptep;
 882        int cc = 0;
 883
 884        pmdp = pmd_alloc_map(mm, addr);
 885        if (unlikely(!pmdp))
 886                return -EFAULT;
 887
 888        ptl = pmd_lock(mm, pmdp);
 889        if (!pmd_present(*pmdp)) {
 890                spin_unlock(ptl);
 891                return -EFAULT;
 892        }
 893
 894        if (pmd_large(*pmdp)) {
 895                paddr = pmd_val(*pmdp) & HPAGE_MASK;
 896                paddr |= addr & ~HPAGE_MASK;
 897                cc = page_reset_referenced(paddr);
 898                spin_unlock(ptl);
 899                return cc;
 900        }
 901        spin_unlock(ptl);
 902
 903        ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
 904        if (unlikely(!ptep))
 905                return -EFAULT;
 906
 907        new = old = pgste_get_lock(ptep);
 908        /* Reset guest reference bit only */
 909        pgste_val(new) &= ~PGSTE_GR_BIT;
 910
 911        if (!(pte_val(*ptep) & _PAGE_INVALID)) {
 912                paddr = pte_val(*ptep) & PAGE_MASK;
 913                cc = page_reset_referenced(paddr);
 914                /* Merge real referenced bit into host-set */
 915                pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
 916        }
 917        /* Reflect guest's logical view, not physical */
 918        cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
 919        /* Changing the guest storage key is considered a change of the page */
 920        if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
 921                pgste_val(new) |= PGSTE_UC_BIT;
 922
 923        pgste_set_unlock(ptep, new);
 924        pte_unmap_unlock(ptep, ptl);
 925        return cc;
 926}
 927EXPORT_SYMBOL(reset_guest_reference_bit);
 928
 929int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 930                          unsigned char *key)
 931{
 932        unsigned long paddr;
 933        spinlock_t *ptl;
 934        pgste_t pgste;
 935        pmd_t *pmdp;
 936        pte_t *ptep;
 937
 938        pmdp = pmd_alloc_map(mm, addr);
 939        if (unlikely(!pmdp))
 940                return -EFAULT;
 941
 942        ptl = pmd_lock(mm, pmdp);
 943        if (!pmd_present(*pmdp)) {
 944                /* Not yet mapped memory has a zero key */
 945                spin_unlock(ptl);
 946                *key = 0;
 947                return 0;
 948        }
 949
 950        if (pmd_large(*pmdp)) {
 951                paddr = pmd_val(*pmdp) & HPAGE_MASK;
 952                paddr |= addr & ~HPAGE_MASK;
 953                *key = page_get_storage_key(paddr);
 954                spin_unlock(ptl);
 955                return 0;
 956        }
 957        spin_unlock(ptl);
 958
 959        ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
 960        if (unlikely(!ptep))
 961                return -EFAULT;
 962
 963        pgste = pgste_get_lock(ptep);
 964        *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
 965        paddr = pte_val(*ptep) & PAGE_MASK;
 966        if (!(pte_val(*ptep) & _PAGE_INVALID))
 967                *key = page_get_storage_key(paddr);
 968        /* Reflect guest's logical view, not physical */
 969        *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
 970        pgste_set_unlock(ptep, pgste);
 971        pte_unmap_unlock(ptep, ptl);
 972        return 0;
 973}
 974EXPORT_SYMBOL(get_guest_storage_key);
 975
 976/**
 977 * pgste_perform_essa - perform ESSA actions on the PGSTE.
 978 * @mm: the memory context. It must have PGSTEs, no check is performed here!
 979 * @hva: the host virtual address of the page whose PGSTE is to be processed
 980 * @orc: the specific action to perform, see the ESSA_SET_* macros.
 981 * @oldpte: the PTE will be saved there if the pointer is not NULL.
 982 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL.
 983 *
 984 * Return: 1 if the page is to be added to the CBRL, otherwise 0,
 985 *         or < 0 in case of error. -EINVAL is returned for invalid values
 986 *         of orc, -EFAULT for invalid addresses.
 987 */
 988int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 989                        unsigned long *oldpte, unsigned long *oldpgste)
 990{
 991        unsigned long pgstev;
 992        spinlock_t *ptl;
 993        pgste_t pgste;
 994        pte_t *ptep;
 995        int res = 0;
 996
 997        WARN_ON_ONCE(orc > ESSA_MAX);
 998        if (unlikely(orc > ESSA_MAX))
 999                return -EINVAL;
1000        ptep = get_locked_pte(mm, hva, &ptl);
1001        if (unlikely(!ptep))
1002                return -EFAULT;
1003        pgste = pgste_get_lock(ptep);
1004        pgstev = pgste_val(pgste);
1005        if (oldpte)
1006                *oldpte = pte_val(*ptep);
1007        if (oldpgste)
1008                *oldpgste = pgstev;
1009
1010        switch (orc) {
1011        case ESSA_GET_STATE:
1012                break;
1013        case ESSA_SET_STABLE:
1014                pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
1015                pgstev |= _PGSTE_GPS_USAGE_STABLE;
1016                break;
1017        case ESSA_SET_UNUSED:
1018                pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1019                pgstev |= _PGSTE_GPS_USAGE_UNUSED;
1020                if (pte_val(*ptep) & _PAGE_INVALID)
1021                        res = 1;
1022                break;
1023        case ESSA_SET_VOLATILE:
1024                pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1025                pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1026                if (pte_val(*ptep) & _PAGE_INVALID)
1027                        res = 1;
1028                break;
1029        case ESSA_SET_POT_VOLATILE:
1030                pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1031                if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1032                        pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE;
1033                        break;
1034                }
1035                if (pgstev & _PGSTE_GPS_ZERO) {
1036                        pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1037                        break;
1038                }
1039                if (!(pgstev & PGSTE_GC_BIT)) {
1040                        pgstev |= _PGSTE_GPS_USAGE_VOLATILE;
1041                        res = 1;
1042                        break;
1043                }
1044                break;
1045        case ESSA_SET_STABLE_RESIDENT:
1046                pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1047                pgstev |= _PGSTE_GPS_USAGE_STABLE;
1048                /*
1049                 * Since the resident state can go away any time after this
1050                 * call, we will not make this page resident. We can revisit
1051                 * this decision if a guest will ever start using this.
1052                 */
1053                break;
1054        case ESSA_SET_STABLE_IF_RESIDENT:
1055                if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1056                        pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1057                        pgstev |= _PGSTE_GPS_USAGE_STABLE;
1058                }
1059                break;
1060        case ESSA_SET_STABLE_NODAT:
1061                pgstev &= ~_PGSTE_GPS_USAGE_MASK;
1062                pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT;
1063                break;
1064        default:
1065                /* we should never get here! */
1066                break;
1067        }
1068        /* If we are discarding a page, set it to logical zero */
1069        if (res)
1070                pgstev |= _PGSTE_GPS_ZERO;
1071
1072        pgste_val(pgste) = pgstev;
1073        pgste_set_unlock(ptep, pgste);
1074        pte_unmap_unlock(ptep, ptl);
1075        return res;
1076}
1077EXPORT_SYMBOL(pgste_perform_essa);
1078
1079/**
1080 * set_pgste_bits - set specific PGSTE bits.
1081 * @mm: the memory context. It must have PGSTEs, no check is performed here!
1082 * @hva: the host virtual address of the page whose PGSTE is to be processed
1083 * @bits: a bitmask representing the bits that will be touched
1084 * @value: the values of the bits to be written. Only the bits in the mask
1085 *         will be written.
1086 *
1087 * Return: 0 on success, < 0 in case of error.
1088 */
1089int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
1090                        unsigned long bits, unsigned long value)
1091{
1092        spinlock_t *ptl;
1093        pgste_t new;
1094        pte_t *ptep;
1095
1096        ptep = get_locked_pte(mm, hva, &ptl);
1097        if (unlikely(!ptep))
1098                return -EFAULT;
1099        new = pgste_get_lock(ptep);
1100
1101        pgste_val(new) &= ~bits;
1102        pgste_val(new) |= value & bits;
1103
1104        pgste_set_unlock(ptep, new);
1105        pte_unmap_unlock(ptep, ptl);
1106        return 0;
1107}
1108EXPORT_SYMBOL(set_pgste_bits);
1109
1110/**
1111 * get_pgste - get the current PGSTE for the given address.
1112 * @mm: the memory context. It must have PGSTEs, no check is performed here!
1113 * @hva: the host virtual address of the page whose PGSTE is to be processed
1114 * @pgstep: will be written with the current PGSTE for the given address.
1115 *
1116 * Return: 0 on success, < 0 in case of error.
1117 */
1118int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
1119{
1120        spinlock_t *ptl;
1121        pte_t *ptep;
1122
1123        ptep = get_locked_pte(mm, hva, &ptl);
1124        if (unlikely(!ptep))
1125                return -EFAULT;
1126        *pgstep = pgste_val(pgste_get(ptep));
1127        pte_unmap_unlock(ptep, ptl);
1128        return 0;
1129}
1130EXPORT_SYMBOL(get_pgste);
1131#endif
1132