linux/arch/s390/include/asm/pgtable.h
<<
>>
Prefs
   1/*
   2 *  S390 version
   3 *    Copyright IBM Corp. 1999, 2000
   4 *    Author(s): Hartmut Penner (hp@de.ibm.com)
   5 *               Ulrich Weigand (weigand@de.ibm.com)
   6 *               Martin Schwidefsky (schwidefsky@de.ibm.com)
   7 *
   8 *  Derived from "include/asm-i386/pgtable.h"
   9 */
  10
  11#ifndef _ASM_S390_PGTABLE_H
  12#define _ASM_S390_PGTABLE_H
  13
  14/*
  15 * The Linux memory management assumes a three-level page table setup.
  16 * For s390 64 bit we use up to four of the five levels the hardware
  17 * provides (region first tables are not used).
  18 *
  19 * The "pgd_xxx()" functions are trivial for a folded two-level
  20 * setup: the pgd is never bad, and a pmd always exists (as it's folded
  21 * into the pgd entry)
  22 *
  23 * This file contains the functions and defines necessary to modify and use
  24 * the S390 page table tree.
  25 */
  26#ifndef __ASSEMBLY__
  27#include <linux/sched.h>
  28#include <linux/mm_types.h>
  29#include <linux/page-flags.h>
  30#include <linux/radix-tree.h>
  31#include <linux/atomic.h>
  32#include <asm/bug.h>
  33#include <asm/page.h>
  34
  35extern pgd_t swapper_pg_dir[];
  36extern void paging_init(void);
  37extern void vmem_map_init(void);
  38pmd_t *vmem_pmd_alloc(void);
  39pte_t *vmem_pte_alloc(void);
  40
  41enum {
  42        PG_DIRECT_MAP_4K = 0,
  43        PG_DIRECT_MAP_1M,
  44        PG_DIRECT_MAP_2G,
  45        PG_DIRECT_MAP_MAX
  46};
  47
  48extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
  49
  50static inline void update_page_count(int level, long count)
  51{
  52        if (IS_ENABLED(CONFIG_PROC_FS))
  53                atomic_long_add(count, &direct_pages_count[level]);
  54}
  55
  56struct seq_file;
  57void arch_report_meminfo(struct seq_file *m);
  58
  59/*
  60 * The S390 doesn't have any external MMU info: the kernel page
  61 * tables contain all the necessary information.
  62 */
  63#define update_mmu_cache(vma, address, ptep)     do { } while (0)
  64#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
  65
  66/*
  67 * ZERO_PAGE is a global shared page that is always zero; used
  68 * for zero-mapped memory areas etc..
  69 */
  70
  71extern unsigned long empty_zero_page;
  72extern unsigned long zero_page_mask;
  73
  74#define ZERO_PAGE(vaddr) \
  75        (virt_to_page((void *)(empty_zero_page + \
  76         (((unsigned long)(vaddr)) &zero_page_mask))))
  77#define __HAVE_COLOR_ZERO_PAGE
  78
  79/* TODO: s390 cannot support io_remap_pfn_range... */
  80#endif /* !__ASSEMBLY__ */
  81
  82/*
  83 * PMD_SHIFT determines the size of the area a second-level page
  84 * table can map
  85 * PGDIR_SHIFT determines what a third-level page table entry can map
  86 */
  87#define PMD_SHIFT       20
  88#define PUD_SHIFT       31
  89#define P4D_SHIFT       42
  90#define PGDIR_SHIFT     53
  91
  92#define PMD_SIZE        (1UL << PMD_SHIFT)
  93#define PMD_MASK        (~(PMD_SIZE-1))
  94#define PUD_SIZE        (1UL << PUD_SHIFT)
  95#define PUD_MASK        (~(PUD_SIZE-1))
  96#define P4D_SIZE        (1UL << P4D_SHIFT)
  97#define P4D_MASK        (~(P4D_SIZE-1))
  98#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
  99#define PGDIR_MASK      (~(PGDIR_SIZE-1))
 100
 101/*
 102 * entries per page directory level: the S390 is two-level, so
 103 * we don't really have any PMD directory physically.
 104 * for S390 segment-table entries are combined to one PGD
 105 * that leads to 1024 pte per pgd
 106 */
 107#define PTRS_PER_PTE    256
 108#define PTRS_PER_PMD    2048
 109#define PTRS_PER_PUD    2048
 110#define PTRS_PER_P4D    2048
 111#define PTRS_PER_PGD    2048
 112
 113#define FIRST_USER_ADDRESS  0UL
 114
 115#define pte_ERROR(e) \
 116        printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
 117#define pmd_ERROR(e) \
 118        printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
 119#define pud_ERROR(e) \
 120        printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
 121#define p4d_ERROR(e) \
 122        printk("%s:%d: bad p4d %p.\n", __FILE__, __LINE__, (void *) p4d_val(e))
 123#define pgd_ERROR(e) \
 124        printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
 125
 126#ifndef __ASSEMBLY__
 127/*
 128 * The vmalloc and module area will always be on the topmost area of the
 129 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
 130 * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
 131 * modules will reside. That makes sure that inter module branches always
 132 * happen without trampolines and in addition the placement within a 2GB frame
 133 * is branch prediction unit friendly.
 134 */
 135extern unsigned long VMALLOC_START;
 136extern unsigned long VMALLOC_END;
 137extern struct page *vmemmap;
 138
 139#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
 140
 141extern unsigned long MODULES_VADDR;
 142extern unsigned long MODULES_END;
 143#define MODULES_VADDR   MODULES_VADDR
 144#define MODULES_END     MODULES_END
 145#define MODULES_LEN     (1UL << 31)
 146
 147static inline int is_module_addr(void *addr)
 148{
 149        BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
 150        if (addr < (void *)MODULES_VADDR)
 151                return 0;
 152        if (addr > (void *)MODULES_END)
 153                return 0;
 154        return 1;
 155}
 156
 157/*
 158 * A 64 bit pagetable entry of S390 has following format:
 159 * |                     PFRA                         |0IPC|  OS  |
 160 * 0000000000111111111122222222223333333333444444444455555555556666
 161 * 0123456789012345678901234567890123456789012345678901234567890123
 162 *
 163 * I Page-Invalid Bit:    Page is not available for address-translation
 164 * P Page-Protection Bit: Store access not possible for page
 165 * C Change-bit override: HW is not required to set change bit
 166 *
 167 * A 64 bit segmenttable entry of S390 has following format:
 168 * |        P-table origin                              |      TT
 169 * 0000000000111111111122222222223333333333444444444455555555556666
 170 * 0123456789012345678901234567890123456789012345678901234567890123
 171 *
 172 * I Segment-Invalid Bit:    Segment is not available for address-translation
 173 * C Common-Segment Bit:     Segment is not private (PoP 3-30)
 174 * P Page-Protection Bit: Store access not possible for page
 175 * TT Type 00
 176 *
 177 * A 64 bit region table entry of S390 has following format:
 178 * |        S-table origin                             |   TF  TTTL
 179 * 0000000000111111111122222222223333333333444444444455555555556666
 180 * 0123456789012345678901234567890123456789012345678901234567890123
 181 *
 182 * I Segment-Invalid Bit:    Segment is not available for address-translation
 183 * TT Type 01
 184 * TF
 185 * TL Table length
 186 *
 187 * The 64 bit regiontable origin of S390 has following format:
 188 * |      region table origon                          |       DTTL
 189 * 0000000000111111111122222222223333333333444444444455555555556666
 190 * 0123456789012345678901234567890123456789012345678901234567890123
 191 *
 192 * X Space-Switch event:
 193 * G Segment-Invalid Bit:  
 194 * P Private-Space Bit:    
 195 * S Storage-Alteration:
 196 * R Real space
 197 * TL Table-Length:
 198 *
 199 * A storage key has the following format:
 200 * | ACC |F|R|C|0|
 201 *  0   3 4 5 6 7
 202 * ACC: access key
 203 * F  : fetch protection bit
 204 * R  : referenced bit
 205 * C  : changed bit
 206 */
 207
 208/* Hardware bits in the page table entry */
 209#define _PAGE_NOEXEC    0x100           /* HW no-execute bit  */
 210#define _PAGE_PROTECT   0x200           /* HW read-only bit  */
 211#define _PAGE_INVALID   0x400           /* HW invalid bit    */
 212#define _PAGE_LARGE     0x800           /* Bit to mark a large pte */
 213
 214/* Software bits in the page table entry */
 215#define _PAGE_PRESENT   0x001           /* SW pte present bit */
 216#define _PAGE_YOUNG     0x004           /* SW pte young bit */
 217#define _PAGE_DIRTY     0x008           /* SW pte dirty bit */
 218#define _PAGE_READ      0x010           /* SW pte read bit */
 219#define _PAGE_WRITE     0x020           /* SW pte write bit */
 220#define _PAGE_SPECIAL   0x040           /* SW associated with special page */
 221#define _PAGE_UNUSED    0x080           /* SW bit for pgste usage state */
 222#define __HAVE_ARCH_PTE_SPECIAL
 223
 224#ifdef CONFIG_MEM_SOFT_DIRTY
 225#define _PAGE_SOFT_DIRTY 0x002          /* SW pte soft dirty bit */
 226#else
 227#define _PAGE_SOFT_DIRTY 0x000
 228#endif
 229
 230/* Set of bits not changed in pte_modify */
 231#define _PAGE_CHG_MASK          (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
 232                                 _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
 233
 234/*
 235 * handle_pte_fault uses pte_present and pte_none to find out the pte type
 236 * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
 237 * distinguish present from not-present ptes. It is changed only with the page
 238 * table lock held.
 239 *
 240 * The following table gives the different possible bit combinations for
 241 * the pte hardware and software bits in the last 12 bits of a pte
 242 * (. unassigned bit, x don't care, t swap type):
 243 *
 244 *                              842100000000
 245 *                              000084210000
 246 *                              000000008421
 247 *                              .IR.uswrdy.p
 248 * empty                        .10.00000000
 249 * swap                         .11..ttttt.0
 250 * prot-none, clean, old        .11.xx0000.1
 251 * prot-none, clean, young      .11.xx0001.1
 252 * prot-none, dirty, old        .11.xx0010.1
 253 * prot-none, dirty, young      .11.xx0011.1
 254 * read-only, clean, old        .11.xx0100.1
 255 * read-only, clean, young      .01.xx0101.1
 256 * read-only, dirty, old        .11.xx0110.1
 257 * read-only, dirty, young      .01.xx0111.1
 258 * read-write, clean, old       .11.xx1100.1
 259 * read-write, clean, young     .01.xx1101.1
 260 * read-write, dirty, old       .10.xx1110.1
 261 * read-write, dirty, young     .00.xx1111.1
 262 * HW-bits: R read-only, I invalid
 263 * SW-bits: p present, y young, d dirty, r read, w write, s special,
 264 *          u unused, l large
 265 *
 266 * pte_none    is true for the bit pattern .10.00000000, pte == 0x400
 267 * pte_swap    is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
 268 * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
 269 */
 270
 271/* Bits in the segment/region table address-space-control-element */
 272#define _ASCE_ORIGIN            ~0xfffUL/* segment table origin             */
 273#define _ASCE_PRIVATE_SPACE     0x100   /* private space control            */
 274#define _ASCE_ALT_EVENT         0x80    /* storage alteration event control */
 275#define _ASCE_SPACE_SWITCH      0x40    /* space switch event               */
 276#define _ASCE_REAL_SPACE        0x20    /* real space control               */
 277#define _ASCE_TYPE_MASK         0x0c    /* asce table type mask             */
 278#define _ASCE_TYPE_REGION1      0x0c    /* region first table type          */
 279#define _ASCE_TYPE_REGION2      0x08    /* region second table type         */
 280#define _ASCE_TYPE_REGION3      0x04    /* region third table type          */
 281#define _ASCE_TYPE_SEGMENT      0x00    /* segment table type               */
 282#define _ASCE_TABLE_LENGTH      0x03    /* region table length              */
 283
 284/* Bits in the region table entry */
 285#define _REGION_ENTRY_ORIGIN    ~0xfffUL/* region/segment table origin      */
 286#define _REGION_ENTRY_PROTECT   0x200   /* region protection bit            */
 287#define _REGION_ENTRY_NOEXEC    0x100   /* region no-execute bit            */
 288#define _REGION_ENTRY_OFFSET    0xc0    /* region table offset              */
 289#define _REGION_ENTRY_INVALID   0x20    /* invalid region table entry       */
 290#define _REGION_ENTRY_TYPE_MASK 0x0c    /* region/segment table type mask   */
 291#define _REGION_ENTRY_TYPE_R1   0x0c    /* region first table type          */
 292#define _REGION_ENTRY_TYPE_R2   0x08    /* region second table type         */
 293#define _REGION_ENTRY_TYPE_R3   0x04    /* region third table type          */
 294#define _REGION_ENTRY_LENGTH    0x03    /* region third length              */
 295
 296#define _REGION1_ENTRY          (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
 297#define _REGION1_ENTRY_EMPTY    (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
 298#define _REGION2_ENTRY          (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
 299#define _REGION2_ENTRY_EMPTY    (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
 300#define _REGION3_ENTRY          (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 301#define _REGION3_ENTRY_EMPTY    (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 302
 303#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address      */
 304#define _REGION3_ENTRY_DIRTY    0x2000  /* SW region dirty bit */
 305#define _REGION3_ENTRY_YOUNG    0x1000  /* SW region young bit */
 306#define _REGION3_ENTRY_LARGE    0x0400  /* RTTE-format control, large page  */
 307#define _REGION3_ENTRY_READ     0x0002  /* SW region read bit */
 308#define _REGION3_ENTRY_WRITE    0x0001  /* SW region write bit */
 309
 310#ifdef CONFIG_MEM_SOFT_DIRTY
 311#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
 312#else
 313#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
 314#endif
 315
 316#define _REGION_ENTRY_BITS       0xfffffffffffff22fUL
 317#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 318
 319/* Bits in the segment table entry */
 320#define _SEGMENT_ENTRY_BITS     0xfffffffffffffe33UL
 321#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
 322#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address        */
 323#define _SEGMENT_ENTRY_ORIGIN   ~0x7ffUL/* segment table origin             */
 324#define _SEGMENT_ENTRY_PROTECT  0x200   /* page protection bit              */
 325#define _SEGMENT_ENTRY_NOEXEC   0x100   /* region no-execute bit            */
 326#define _SEGMENT_ENTRY_INVALID  0x20    /* invalid segment table entry      */
 327
 328#define _SEGMENT_ENTRY          (0)
 329#define _SEGMENT_ENTRY_EMPTY    (_SEGMENT_ENTRY_INVALID)
 330
 331#define _SEGMENT_ENTRY_DIRTY    0x2000  /* SW segment dirty bit */
 332#define _SEGMENT_ENTRY_YOUNG    0x1000  /* SW segment young bit */
 333#define _SEGMENT_ENTRY_LARGE    0x0400  /* STE-format control, large page */
 334#define _SEGMENT_ENTRY_WRITE    0x0002  /* SW segment write bit */
 335#define _SEGMENT_ENTRY_READ     0x0001  /* SW segment read bit */
 336
 337#ifdef CONFIG_MEM_SOFT_DIRTY
 338#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
 339#else
 340#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
 341#endif
 342
 343/*
 344 * Segment table and region3 table entry encoding
 345 * (R = read-only, I = invalid, y = young bit):
 346 *                              dy..R...I...wr
 347 * prot-none, clean, old        00..1...1...00
 348 * prot-none, clean, young      01..1...1...00
 349 * prot-none, dirty, old        10..1...1...00
 350 * prot-none, dirty, young      11..1...1...00
 351 * read-only, clean, old        00..1...1...01
 352 * read-only, clean, young      01..1...0...01
 353 * read-only, dirty, old        10..1...1...01
 354 * read-only, dirty, young      11..1...0...01
 355 * read-write, clean, old       00..1...1...11
 356 * read-write, clean, young     01..1...0...11
 357 * read-write, dirty, old       10..0...1...11
 358 * read-write, dirty, young     11..0...0...11
 359 * The segment table origin is used to distinguish empty (origin==0) from
 360 * read-write, old segment table entries (origin!=0)
 361 * HW-bits: R read-only, I invalid
 362 * SW-bits: y young, d dirty, r read, w write
 363 */
 364
 365/* Page status table bits for virtualization */
 366#define PGSTE_ACC_BITS  0xf000000000000000UL
 367#define PGSTE_FP_BIT    0x0800000000000000UL
 368#define PGSTE_PCL_BIT   0x0080000000000000UL
 369#define PGSTE_HR_BIT    0x0040000000000000UL
 370#define PGSTE_HC_BIT    0x0020000000000000UL
 371#define PGSTE_GR_BIT    0x0004000000000000UL
 372#define PGSTE_GC_BIT    0x0002000000000000UL
 373#define PGSTE_UC_BIT    0x0000800000000000UL    /* user dirty (migration) */
 374#define PGSTE_IN_BIT    0x0000400000000000UL    /* IPTE notify bit */
 375#define PGSTE_VSIE_BIT  0x0000200000000000UL    /* ref'd in a shadow table */
 376
 377/* Guest Page State used for virtualization */
 378#define _PGSTE_GPS_ZERO                 0x0000000080000000UL
 379#define _PGSTE_GPS_USAGE_MASK           0x0000000003000000UL
 380#define _PGSTE_GPS_USAGE_STABLE         0x0000000000000000UL
 381#define _PGSTE_GPS_USAGE_UNUSED         0x0000000001000000UL
 382#define _PGSTE_GPS_USAGE_POT_VOLATILE   0x0000000002000000UL
 383#define _PGSTE_GPS_USAGE_VOLATILE       _PGSTE_GPS_USAGE_MASK
 384
 385/*
 386 * A user page table pointer has the space-switch-event bit, the
 387 * private-space-control bit and the storage-alteration-event-control
 388 * bit set. A kernel page table pointer doesn't need them.
 389 */
 390#define _ASCE_USER_BITS         (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
 391                                 _ASCE_ALT_EVENT)
 392
 393/*
 394 * Page protection definitions.
 395 */
 396#define PAGE_NONE       __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT)
 397#define PAGE_RO         __pgprot(_PAGE_PRESENT | _PAGE_READ | \
 398                                 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
 399#define PAGE_RX         __pgprot(_PAGE_PRESENT | _PAGE_READ | \
 400                                 _PAGE_INVALID | _PAGE_PROTECT)
 401#define PAGE_RW         __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 402                                 _PAGE_NOEXEC  | _PAGE_INVALID | _PAGE_PROTECT)
 403#define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 404                                 _PAGE_INVALID | _PAGE_PROTECT)
 405
 406#define PAGE_SHARED     __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 407                                 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
 408#define PAGE_KERNEL     __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 409                                 _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC)
 410#define PAGE_KERNEL_RO  __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
 411                                 _PAGE_PROTECT | _PAGE_NOEXEC)
 412#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
 413                                  _PAGE_YOUNG | _PAGE_DIRTY)
 414
 415/*
 416 * On s390 the page table entry has an invalid bit and a read-only bit.
 417 * Read permission implies execute permission and write permission
 418 * implies read permission.
 419 */
 420         /*xwr*/
 421#define __P000  PAGE_NONE
 422#define __P001  PAGE_RO
 423#define __P010  PAGE_RO
 424#define __P011  PAGE_RO
 425#define __P100  PAGE_RX
 426#define __P101  PAGE_RX
 427#define __P110  PAGE_RX
 428#define __P111  PAGE_RX
 429
 430#define __S000  PAGE_NONE
 431#define __S001  PAGE_RO
 432#define __S010  PAGE_RW
 433#define __S011  PAGE_RW
 434#define __S100  PAGE_RX
 435#define __S101  PAGE_RX
 436#define __S110  PAGE_RWX
 437#define __S111  PAGE_RWX
 438
 439/*
 440 * Segment entry (large page) protection definitions.
 441 */
 442#define SEGMENT_NONE    __pgprot(_SEGMENT_ENTRY_INVALID | \
 443                                 _SEGMENT_ENTRY_PROTECT)
 444#define SEGMENT_RO      __pgprot(_SEGMENT_ENTRY_PROTECT | \
 445                                 _SEGMENT_ENTRY_READ | \
 446                                 _SEGMENT_ENTRY_NOEXEC)
 447#define SEGMENT_RX      __pgprot(_SEGMENT_ENTRY_PROTECT | \
 448                                 _SEGMENT_ENTRY_READ)
 449#define SEGMENT_RW      __pgprot(_SEGMENT_ENTRY_READ | \
 450                                 _SEGMENT_ENTRY_WRITE | \
 451                                 _SEGMENT_ENTRY_NOEXEC)
 452#define SEGMENT_RWX     __pgprot(_SEGMENT_ENTRY_READ | \
 453                                 _SEGMENT_ENTRY_WRITE)
 454#define SEGMENT_KERNEL  __pgprot(_SEGMENT_ENTRY |       \
 455                                 _SEGMENT_ENTRY_LARGE | \
 456                                 _SEGMENT_ENTRY_READ |  \
 457                                 _SEGMENT_ENTRY_WRITE | \
 458                                 _SEGMENT_ENTRY_YOUNG | \
 459                                 _SEGMENT_ENTRY_DIRTY | \
 460                                 _SEGMENT_ENTRY_NOEXEC)
 461#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |     \
 462                                 _SEGMENT_ENTRY_LARGE | \
 463                                 _SEGMENT_ENTRY_READ |  \
 464                                 _SEGMENT_ENTRY_YOUNG | \
 465                                 _SEGMENT_ENTRY_PROTECT | \
 466                                 _SEGMENT_ENTRY_NOEXEC)
 467
 468/*
 469 * Region3 entry (large page) protection definitions.
 470 */
 471
 472#define REGION3_KERNEL  __pgprot(_REGION_ENTRY_TYPE_R3 | \
 473                                 _REGION3_ENTRY_LARGE |  \
 474                                 _REGION3_ENTRY_READ |   \
 475                                 _REGION3_ENTRY_WRITE |  \
 476                                 _REGION3_ENTRY_YOUNG |  \
 477                                 _REGION3_ENTRY_DIRTY | \
 478                                 _REGION_ENTRY_NOEXEC)
 479#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
 480                                   _REGION3_ENTRY_LARGE |  \
 481                                   _REGION3_ENTRY_READ |   \
 482                                   _REGION3_ENTRY_YOUNG |  \
 483                                   _REGION_ENTRY_PROTECT | \
 484                                   _REGION_ENTRY_NOEXEC)
 485
 486static inline int mm_has_pgste(struct mm_struct *mm)
 487{
 488#ifdef CONFIG_PGSTE
 489        if (unlikely(mm->context.has_pgste))
 490                return 1;
 491#endif
 492        return 0;
 493}
 494
 495static inline int mm_alloc_pgste(struct mm_struct *mm)
 496{
 497#ifdef CONFIG_PGSTE
 498        if (unlikely(mm->context.alloc_pgste))
 499                return 1;
 500#endif
 501        return 0;
 502}
 503
 504/*
 505 * In the case that a guest uses storage keys
 506 * faults should no longer be backed by zero pages
 507 */
 508#define mm_forbids_zeropage mm_use_skey
 509static inline int mm_use_skey(struct mm_struct *mm)
 510{
 511#ifdef CONFIG_PGSTE
 512        if (mm->context.use_skey)
 513                return 1;
 514#endif
 515        return 0;
 516}
 517
 518static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
 519{
 520        register unsigned long reg2 asm("2") = old;
 521        register unsigned long reg3 asm("3") = new;
 522        unsigned long address = (unsigned long)ptr | 1;
 523
 524        asm volatile(
 525                "       csp     %0,%3"
 526                : "+d" (reg2), "+m" (*ptr)
 527                : "d" (reg3), "d" (address)
 528                : "cc");
 529}
 530
 531static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
 532{
 533        register unsigned long reg2 asm("2") = old;
 534        register unsigned long reg3 asm("3") = new;
 535        unsigned long address = (unsigned long)ptr | 1;
 536
 537        asm volatile(
 538                "       .insn   rre,0xb98a0000,%0,%3"
 539                : "+d" (reg2), "+m" (*ptr)
 540                : "d" (reg3), "d" (address)
 541                : "cc");
 542}
 543
 544#define CRDTE_DTT_PAGE          0x00UL
 545#define CRDTE_DTT_SEGMENT       0x10UL
 546#define CRDTE_DTT_REGION3       0x14UL
 547#define CRDTE_DTT_REGION2       0x18UL
 548#define CRDTE_DTT_REGION1       0x1cUL
 549
 550static inline void crdte(unsigned long old, unsigned long new,
 551                         unsigned long table, unsigned long dtt,
 552                         unsigned long address, unsigned long asce)
 553{
 554        register unsigned long reg2 asm("2") = old;
 555        register unsigned long reg3 asm("3") = new;
 556        register unsigned long reg4 asm("4") = table | dtt;
 557        register unsigned long reg5 asm("5") = address;
 558
 559        asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0"
 560                     : "+d" (reg2)
 561                     : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce)
 562                     : "memory", "cc");
 563}
 564
 565/*
 566 * pgd/p4d/pud/pmd/pte query functions
 567 */
 568static inline int pgd_folded(pgd_t pgd)
 569{
 570        return (pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1;
 571}
 572
 573static inline int pgd_present(pgd_t pgd)
 574{
 575        if (pgd_folded(pgd))
 576                return 1;
 577        return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
 578}
 579
 580static inline int pgd_none(pgd_t pgd)
 581{
 582        if (pgd_folded(pgd))
 583                return 0;
 584        return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
 585}
 586
 587static inline int pgd_bad(pgd_t pgd)
 588{
 589        /*
 590         * With dynamic page table levels the pgd can be a region table
 591         * entry or a segment table entry. Check for the bit that are
 592         * invalid for either table entry.
 593         */
 594        unsigned long mask =
 595                ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
 596                ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
 597        return (pgd_val(pgd) & mask) != 0;
 598}
 599
 600static inline int p4d_folded(p4d_t p4d)
 601{
 602        return (p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2;
 603}
 604
 605static inline int p4d_present(p4d_t p4d)
 606{
 607        if (p4d_folded(p4d))
 608                return 1;
 609        return (p4d_val(p4d) & _REGION_ENTRY_ORIGIN) != 0UL;
 610}
 611
 612static inline int p4d_none(p4d_t p4d)
 613{
 614        if (p4d_folded(p4d))
 615                return 0;
 616        return p4d_val(p4d) == _REGION2_ENTRY_EMPTY;
 617}
 618
 619static inline unsigned long p4d_pfn(p4d_t p4d)
 620{
 621        unsigned long origin_mask;
 622
 623        origin_mask = _REGION_ENTRY_ORIGIN;
 624        return (p4d_val(p4d) & origin_mask) >> PAGE_SHIFT;
 625}
 626
 627static inline int pud_folded(pud_t pud)
 628{
 629        return (pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3;
 630}
 631
 632static inline int pud_present(pud_t pud)
 633{
 634        if (pud_folded(pud))
 635                return 1;
 636        return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
 637}
 638
 639static inline int pud_none(pud_t pud)
 640{
 641        if (pud_folded(pud))
 642                return 0;
 643        return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 644}
 645
 646static inline int pud_large(pud_t pud)
 647{
 648        if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
 649                return 0;
 650        return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 651}
 652
 653static inline unsigned long pud_pfn(pud_t pud)
 654{
 655        unsigned long origin_mask;
 656
 657        origin_mask = _REGION_ENTRY_ORIGIN;
 658        if (pud_large(pud))
 659                origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
 660        return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
 661}
 662
 663static inline int pmd_large(pmd_t pmd)
 664{
 665        return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 666}
 667
 668static inline int pmd_bad(pmd_t pmd)
 669{
 670        if (pmd_large(pmd))
 671                return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
 672        return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 673}
 674
 675static inline int pud_bad(pud_t pud)
 676{
 677        if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 678                return pmd_bad(__pmd(pud_val(pud)));
 679        if (pud_large(pud))
 680                return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
 681        return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 682}
 683
 684static inline int p4d_bad(p4d_t p4d)
 685{
 686        if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
 687                return pud_bad(__pud(p4d_val(p4d)));
 688        return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
 689}
 690
 691static inline int pmd_present(pmd_t pmd)
 692{
 693        return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY;
 694}
 695
 696static inline int pmd_none(pmd_t pmd)
 697{
 698        return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
 699}
 700
 701static inline unsigned long pmd_pfn(pmd_t pmd)
 702{
 703        unsigned long origin_mask;
 704
 705        origin_mask = _SEGMENT_ENTRY_ORIGIN;
 706        if (pmd_large(pmd))
 707                origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
 708        return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
 709}
 710
 711#define __HAVE_ARCH_PMD_WRITE
 712static inline int pmd_write(pmd_t pmd)
 713{
 714        return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
 715}
 716
 717static inline int pmd_dirty(pmd_t pmd)
 718{
 719        int dirty = 1;
 720        if (pmd_large(pmd))
 721                dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
 722        return dirty;
 723}
 724
 725static inline int pmd_young(pmd_t pmd)
 726{
 727        int young = 1;
 728        if (pmd_large(pmd))
 729                young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
 730        return young;
 731}
 732
 733static inline int pte_present(pte_t pte)
 734{
 735        /* Bit pattern: (pte & 0x001) == 0x001 */
 736        return (pte_val(pte) & _PAGE_PRESENT) != 0;
 737}
 738
 739static inline int pte_none(pte_t pte)
 740{
 741        /* Bit pattern: pte == 0x400 */
 742        return pte_val(pte) == _PAGE_INVALID;
 743}
 744
 745static inline int pte_swap(pte_t pte)
 746{
 747        /* Bit pattern: (pte & 0x201) == 0x200 */
 748        return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
 749                == _PAGE_PROTECT;
 750}
 751
 752static inline int pte_special(pte_t pte)
 753{
 754        return (pte_val(pte) & _PAGE_SPECIAL);
 755}
 756
 757#define __HAVE_ARCH_PTE_SAME
 758static inline int pte_same(pte_t a, pte_t b)
 759{
 760        return pte_val(a) == pte_val(b);
 761}
 762
 763#ifdef CONFIG_NUMA_BALANCING
 764static inline int pte_protnone(pte_t pte)
 765{
 766        return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
 767}
 768
 769static inline int pmd_protnone(pmd_t pmd)
 770{
 771        /* pmd_large(pmd) implies pmd_present(pmd) */
 772        return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
 773}
 774#endif
 775
 776static inline int pte_soft_dirty(pte_t pte)
 777{
 778        return pte_val(pte) & _PAGE_SOFT_DIRTY;
 779}
 780#define pte_swp_soft_dirty pte_soft_dirty
 781
 782static inline pte_t pte_mksoft_dirty(pte_t pte)
 783{
 784        pte_val(pte) |= _PAGE_SOFT_DIRTY;
 785        return pte;
 786}
 787#define pte_swp_mksoft_dirty pte_mksoft_dirty
 788
 789static inline pte_t pte_clear_soft_dirty(pte_t pte)
 790{
 791        pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
 792        return pte;
 793}
 794#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
 795
 796static inline int pmd_soft_dirty(pmd_t pmd)
 797{
 798        return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
 799}
 800
 801static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 802{
 803        pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
 804        return pmd;
 805}
 806
 807static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
 808{
 809        pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
 810        return pmd;
 811}
 812
 813/*
 814 * query functions pte_write/pte_dirty/pte_young only work if
 815 * pte_present() is true. Undefined behaviour if not..
 816 */
 817static inline int pte_write(pte_t pte)
 818{
 819        return (pte_val(pte) & _PAGE_WRITE) != 0;
 820}
 821
 822static inline int pte_dirty(pte_t pte)
 823{
 824        return (pte_val(pte) & _PAGE_DIRTY) != 0;
 825}
 826
 827static inline int pte_young(pte_t pte)
 828{
 829        return (pte_val(pte) & _PAGE_YOUNG) != 0;
 830}
 831
 832#define __HAVE_ARCH_PTE_UNUSED
 833static inline int pte_unused(pte_t pte)
 834{
 835        return pte_val(pte) & _PAGE_UNUSED;
 836}
 837
 838/*
 839 * pgd/pmd/pte modification functions
 840 */
 841
 842static inline void pgd_clear(pgd_t *pgd)
 843{
 844        if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
 845                pgd_val(*pgd) = _REGION1_ENTRY_EMPTY;
 846}
 847
 848static inline void p4d_clear(p4d_t *p4d)
 849{
 850        if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
 851                p4d_val(*p4d) = _REGION2_ENTRY_EMPTY;
 852}
 853
 854static inline void pud_clear(pud_t *pud)
 855{
 856        if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
 857                pud_val(*pud) = _REGION3_ENTRY_EMPTY;
 858}
 859
 860static inline void pmd_clear(pmd_t *pmdp)
 861{
 862        pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
 863}
 864
 865static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 866{
 867        pte_val(*ptep) = _PAGE_INVALID;
 868}
 869
 870/*
 871 * The following pte modification functions only work if
 872 * pte_present() is true. Undefined behaviour if not..
 873 */
 874static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 875{
 876        pte_val(pte) &= _PAGE_CHG_MASK;
 877        pte_val(pte) |= pgprot_val(newprot);
 878        /*
 879         * newprot for PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX
 880         * has the invalid bit set, clear it again for readable, young pages
 881         */
 882        if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
 883                pte_val(pte) &= ~_PAGE_INVALID;
 884        /*
 885         * newprot for PAGE_RO, PAGE_RX, PAGE_RW and PAGE_RWX has the page
 886         * protection bit set, clear it again for writable, dirty pages
 887         */
 888        if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
 889                pte_val(pte) &= ~_PAGE_PROTECT;
 890        return pte;
 891}
 892
 893static inline pte_t pte_wrprotect(pte_t pte)
 894{
 895        pte_val(pte) &= ~_PAGE_WRITE;
 896        pte_val(pte) |= _PAGE_PROTECT;
 897        return pte;
 898}
 899
 900static inline pte_t pte_mkwrite(pte_t pte)
 901{
 902        pte_val(pte) |= _PAGE_WRITE;
 903        if (pte_val(pte) & _PAGE_DIRTY)
 904                pte_val(pte) &= ~_PAGE_PROTECT;
 905        return pte;
 906}
 907
 908static inline pte_t pte_mkclean(pte_t pte)
 909{
 910        pte_val(pte) &= ~_PAGE_DIRTY;
 911        pte_val(pte) |= _PAGE_PROTECT;
 912        return pte;
 913}
 914
 915static inline pte_t pte_mkdirty(pte_t pte)
 916{
 917        pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
 918        if (pte_val(pte) & _PAGE_WRITE)
 919                pte_val(pte) &= ~_PAGE_PROTECT;
 920        return pte;
 921}
 922
 923static inline pte_t pte_mkold(pte_t pte)
 924{
 925        pte_val(pte) &= ~_PAGE_YOUNG;
 926        pte_val(pte) |= _PAGE_INVALID;
 927        return pte;
 928}
 929
 930static inline pte_t pte_mkyoung(pte_t pte)
 931{
 932        pte_val(pte) |= _PAGE_YOUNG;
 933        if (pte_val(pte) & _PAGE_READ)
 934                pte_val(pte) &= ~_PAGE_INVALID;
 935        return pte;
 936}
 937
 938static inline pte_t pte_mkspecial(pte_t pte)
 939{
 940        pte_val(pte) |= _PAGE_SPECIAL;
 941        return pte;
 942}
 943
 944#ifdef CONFIG_HUGETLB_PAGE
 945static inline pte_t pte_mkhuge(pte_t pte)
 946{
 947        pte_val(pte) |= _PAGE_LARGE;
 948        return pte;
 949}
 950#endif
 951
 952#define IPTE_GLOBAL     0
 953#define IPTE_LOCAL      1
 954
 955static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
 956{
 957        unsigned long pto = (unsigned long) ptep;
 958
 959        /* Invalidation + TLB flush for the pte */
 960        asm volatile(
 961                "       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
 962                : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
 963                  [m4] "i" (local));
 964}
 965
 966static inline void __ptep_ipte_range(unsigned long address, int nr,
 967                                     pte_t *ptep, int local)
 968{
 969        unsigned long pto = (unsigned long) ptep;
 970
 971        /* Invalidate a range of ptes + TLB flush of the ptes */
 972        do {
 973                asm volatile(
 974                        "       .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
 975                        : [r2] "+a" (address), [r3] "+a" (nr)
 976                        : [r1] "a" (pto), [m4] "i" (local) : "memory");
 977        } while (nr != 255);
 978}
 979
 980/*
 981 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
 982 * both clear the TLB for the unmapped pte. The reason is that
 983 * ptep_get_and_clear is used in common code (e.g. change_pte_range)
 984 * to modify an active pte. The sequence is
 985 *   1) ptep_get_and_clear
 986 *   2) set_pte_at
 987 *   3) flush_tlb_range
 988 * On s390 the tlb needs to get flushed with the modification of the pte
 989 * if the pte is active. The only way how this can be implemented is to
 990 * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
 991 * is a nop.
 992 */
 993pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
 994pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
 995
 996#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 997static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 998                                            unsigned long addr, pte_t *ptep)
 999{
1000        pte_t pte = *ptep;
1001
1002        pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
1003        return pte_young(pte);
1004}
1005
1006#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1007static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1008                                         unsigned long address, pte_t *ptep)
1009{
1010        return ptep_test_and_clear_young(vma, address, ptep);
1011}
1012
1013#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1014static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1015                                       unsigned long addr, pte_t *ptep)
1016{
1017        return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
1018}
1019
1020#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1021pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
1022void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
1023
1024#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
1025static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1026                                     unsigned long addr, pte_t *ptep)
1027{
1028        return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
1029}
1030
1031/*
1032 * The batched pte unmap code uses ptep_get_and_clear_full to clear the
1033 * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
1034 * tlbs of an mm if it can guarantee that the ptes of the mm_struct
1035 * cannot be accessed while the batched unmap is running. In this case
1036 * full==1 and a simple pte_clear is enough. See tlb.h.
1037 */
1038#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
1039static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1040                                            unsigned long addr,
1041                                            pte_t *ptep, int full)
1042{
1043        if (full) {
1044                pte_t pte = *ptep;
1045                *ptep = __pte(_PAGE_INVALID);
1046                return pte;
1047        }
1048        return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
1049}
1050
1051#define __HAVE_ARCH_PTEP_SET_WRPROTECT
1052static inline void ptep_set_wrprotect(struct mm_struct *mm,
1053                                      unsigned long addr, pte_t *ptep)
1054{
1055        pte_t pte = *ptep;
1056
1057        if (pte_write(pte))
1058                ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
1059}
1060
1061#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1062static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1063                                        unsigned long addr, pte_t *ptep,
1064                                        pte_t entry, int dirty)
1065{
1066        if (pte_same(*ptep, entry))
1067                return 0;
1068        ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
1069        return 1;
1070}
1071
1072/*
1073 * Additional functions to handle KVM guest page tables
1074 */
1075void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
1076                     pte_t *ptep, pte_t entry);
1077void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
1078void ptep_notify(struct mm_struct *mm, unsigned long addr,
1079                 pte_t *ptep, unsigned long bits);
1080int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr,
1081                    pte_t *ptep, int prot, unsigned long bit);
1082void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
1083                     pte_t *ptep , int reset);
1084void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
1085int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
1086                    pte_t *sptep, pte_t *tptep, pte_t pte);
1087void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
1088
1089bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
1090int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
1091                          unsigned char key, bool nq);
1092int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
1093                               unsigned char key, unsigned char *oldkey,
1094                               bool nq, bool mr, bool mc);
1095int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
1096int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
1097                          unsigned char *key);
1098
1099int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
1100                                unsigned long bits, unsigned long value);
1101int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
1102int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
1103                        unsigned long *oldpte, unsigned long *oldpgste);
1104
1105/*
1106 * Certain architectures need to do special things when PTEs
1107 * within a page table are directly modified.  Thus, the following
1108 * hook is made available.
1109 */
1110static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
1111                              pte_t *ptep, pte_t entry)
1112{
1113        if (!MACHINE_HAS_NX)
1114                pte_val(entry) &= ~_PAGE_NOEXEC;
1115        if (pte_present(entry))
1116                pte_val(entry) &= ~_PAGE_UNUSED;
1117        if (mm_has_pgste(mm))
1118                ptep_set_pte_at(mm, addr, ptep, entry);
1119        else
1120                *ptep = entry;
1121}
1122
1123/*
1124 * Conversion functions: convert a page and protection to a page entry,
1125 * and a page entry and page directory to the page they refer to.
1126 */
1127static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
1128{
1129        pte_t __pte;
1130        pte_val(__pte) = physpage + pgprot_val(pgprot);
1131        return pte_mkyoung(__pte);
1132}
1133
1134static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
1135{
1136        unsigned long physpage = page_to_phys(page);
1137        pte_t __pte = mk_pte_phys(physpage, pgprot);
1138
1139        if (pte_write(__pte) && PageDirty(page))
1140                __pte = pte_mkdirty(__pte);
1141        return __pte;
1142}
1143
1144#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
1145#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
1146#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
1147#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
1148#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
1149
1150#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
1151#define pgd_offset_k(address) pgd_offset(&init_mm, address)
1152
1153#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
1154#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
1155#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
1156#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
1157
1158static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
1159{
1160        p4d_t *p4d = (p4d_t *) pgd;
1161
1162        if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
1163                p4d = (p4d_t *) pgd_deref(*pgd);
1164        return p4d + p4d_index(address);
1165}
1166
1167static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
1168{
1169        pud_t *pud = (pud_t *) p4d;
1170
1171        if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
1172                pud = (pud_t *) p4d_deref(*p4d);
1173        return pud + pud_index(address);
1174}
1175
1176static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
1177{
1178        pmd_t *pmd = (pmd_t *) pud;
1179
1180        if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
1181                pmd = (pmd_t *) pud_deref(*pud);
1182        return pmd + pmd_index(address);
1183}
1184
1185#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
1186#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
1187#define pte_page(x) pfn_to_page(pte_pfn(x))
1188
1189#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
1190#define pud_page(pud) pfn_to_page(pud_pfn(pud))
1191#define p4d_page(pud) pfn_to_page(p4d_pfn(p4d))
1192
1193/* Find an entry in the lowest level page table.. */
1194#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
1195#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
1196#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
1197#define pte_unmap(pte) do { } while (0)
1198
1199static inline pmd_t pmd_wrprotect(pmd_t pmd)
1200{
1201        pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
1202        pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1203        return pmd;
1204}
1205
1206static inline pmd_t pmd_mkwrite(pmd_t pmd)
1207{
1208        pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
1209        if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
1210                return pmd;
1211        pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1212        return pmd;
1213}
1214
1215static inline pmd_t pmd_mkclean(pmd_t pmd)
1216{
1217        if (pmd_large(pmd)) {
1218                pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
1219                pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1220        }
1221        return pmd;
1222}
1223
1224static inline pmd_t pmd_mkdirty(pmd_t pmd)
1225{
1226        if (pmd_large(pmd)) {
1227                pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
1228                                _SEGMENT_ENTRY_SOFT_DIRTY;
1229                if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
1230                        pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1231        }
1232        return pmd;
1233}
1234
1235static inline pud_t pud_wrprotect(pud_t pud)
1236{
1237        pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
1238        pud_val(pud) |= _REGION_ENTRY_PROTECT;
1239        return pud;
1240}
1241
1242static inline pud_t pud_mkwrite(pud_t pud)
1243{
1244        pud_val(pud) |= _REGION3_ENTRY_WRITE;
1245        if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
1246                return pud;
1247        pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
1248        return pud;
1249}
1250
1251static inline pud_t pud_mkclean(pud_t pud)
1252{
1253        if (pud_large(pud)) {
1254                pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
1255                pud_val(pud) |= _REGION_ENTRY_PROTECT;
1256        }
1257        return pud;
1258}
1259
1260static inline pud_t pud_mkdirty(pud_t pud)
1261{
1262        if (pud_large(pud)) {
1263                pud_val(pud) |= _REGION3_ENTRY_DIRTY |
1264                                _REGION3_ENTRY_SOFT_DIRTY;
1265                if (pud_val(pud) & _REGION3_ENTRY_WRITE)
1266                        pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
1267        }
1268        return pud;
1269}
1270
1271#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
1272static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
1273{
1274        /*
1275         * pgprot is PAGE_NONE, PAGE_RO, PAGE_RX, PAGE_RW or PAGE_RWX
1276         * (see __Pxxx / __Sxxx). Convert to segment table entry format.
1277         */
1278        if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
1279                return pgprot_val(SEGMENT_NONE);
1280        if (pgprot_val(pgprot) == pgprot_val(PAGE_RO))
1281                return pgprot_val(SEGMENT_RO);
1282        if (pgprot_val(pgprot) == pgprot_val(PAGE_RX))
1283                return pgprot_val(SEGMENT_RX);
1284        if (pgprot_val(pgprot) == pgprot_val(PAGE_RW))
1285                return pgprot_val(SEGMENT_RW);
1286        return pgprot_val(SEGMENT_RWX);
1287}
1288
1289static inline pmd_t pmd_mkyoung(pmd_t pmd)
1290{
1291        if (pmd_large(pmd)) {
1292                pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1293                if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
1294                        pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
1295        }
1296        return pmd;
1297}
1298
1299static inline pmd_t pmd_mkold(pmd_t pmd)
1300{
1301        if (pmd_large(pmd)) {
1302                pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
1303                pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1304        }
1305        return pmd;
1306}
1307
1308static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1309{
1310        if (pmd_large(pmd)) {
1311                pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
1312                        _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
1313                        _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
1314                pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1315                if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
1316                        pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1317                if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
1318                        pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1319                return pmd;
1320        }
1321        pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
1322        pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1323        return pmd;
1324}
1325
1326static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
1327{
1328        pmd_t __pmd;
1329        pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
1330        return __pmd;
1331}
1332
1333#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
1334
1335static inline void __pmdp_csp(pmd_t *pmdp)
1336{
1337        csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
1338            pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
1339}
1340
1341#define IDTE_GLOBAL     0
1342#define IDTE_LOCAL      1
1343
1344static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
1345{
1346        unsigned long sto;
1347
1348        sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
1349        asm volatile(
1350                "       .insn   rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
1351                : "+m" (*pmdp)
1352                : [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
1353                  [m4] "i" (local)
1354                : "cc" );
1355}
1356
1357static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
1358{
1359        unsigned long r3o;
1360
1361        r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
1362        r3o |= _ASCE_TYPE_REGION3;
1363        asm volatile(
1364                "       .insn   rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
1365                : "+m" (*pudp)
1366                : [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
1367                  [m4] "i" (local)
1368                : "cc");
1369}
1370
1371pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
1372pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
1373pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
1374
1375#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1376
1377#define __HAVE_ARCH_PGTABLE_DEPOSIT
1378void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1379                                pgtable_t pgtable);
1380
1381#define __HAVE_ARCH_PGTABLE_WITHDRAW
1382pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
1383
1384#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1385static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
1386                                        unsigned long addr, pmd_t *pmdp,
1387                                        pmd_t entry, int dirty)
1388{
1389        VM_BUG_ON(addr & ~HPAGE_MASK);
1390
1391        entry = pmd_mkyoung(entry);
1392        if (dirty)
1393                entry = pmd_mkdirty(entry);
1394        if (pmd_val(*pmdp) == pmd_val(entry))
1395                return 0;
1396        pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
1397        return 1;
1398}
1399
1400#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1401static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1402                                            unsigned long addr, pmd_t *pmdp)
1403{
1404        pmd_t pmd = *pmdp;
1405
1406        pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
1407        return pmd_young(pmd);
1408}
1409
1410#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
1411static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
1412                                         unsigned long addr, pmd_t *pmdp)
1413{
1414        VM_BUG_ON(addr & ~HPAGE_MASK);
1415        return pmdp_test_and_clear_young(vma, addr, pmdp);
1416}
1417
1418static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1419                              pmd_t *pmdp, pmd_t entry)
1420{
1421        if (!MACHINE_HAS_NX)
1422                pmd_val(entry) &= ~_SEGMENT_ENTRY_NOEXEC;
1423        *pmdp = entry;
1424}
1425
1426static inline pmd_t pmd_mkhuge(pmd_t pmd)
1427{
1428        pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
1429        pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1430        pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1431        return pmd;
1432}
1433
1434#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1435static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
1436                                            unsigned long addr, pmd_t *pmdp)
1437{
1438        return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1439}
1440
1441#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
1442static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
1443                                                 unsigned long addr,
1444                                                 pmd_t *pmdp, int full)
1445{
1446        if (full) {
1447                pmd_t pmd = *pmdp;
1448                *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
1449                return pmd;
1450        }
1451        return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1452}
1453
1454#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
1455static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
1456                                          unsigned long addr, pmd_t *pmdp)
1457{
1458        return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
1459}
1460
1461#define __HAVE_ARCH_PMDP_INVALIDATE
1462static inline void pmdp_invalidate(struct vm_area_struct *vma,
1463                                   unsigned long addr, pmd_t *pmdp)
1464{
1465        pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
1466}
1467
1468#define __HAVE_ARCH_PMDP_SET_WRPROTECT
1469static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1470                                      unsigned long addr, pmd_t *pmdp)
1471{
1472        pmd_t pmd = *pmdp;
1473
1474        if (pmd_write(pmd))
1475                pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
1476}
1477
1478static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
1479                                        unsigned long address,
1480                                        pmd_t *pmdp)
1481{
1482        return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
1483}
1484#define pmdp_collapse_flush pmdp_collapse_flush
1485
1486#define pfn_pmd(pfn, pgprot)    mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
1487#define mk_pmd(page, pgprot)    pfn_pmd(page_to_pfn(page), (pgprot))
1488
1489static inline int pmd_trans_huge(pmd_t pmd)
1490{
1491        return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
1492}
1493
1494#define has_transparent_hugepage has_transparent_hugepage
1495static inline int has_transparent_hugepage(void)
1496{
1497        return MACHINE_HAS_EDAT1 ? 1 : 0;
1498}
1499#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1500
1501/*
1502 * 64 bit swap entry format:
1503 * A page-table entry has some bits we have to treat in a special way.
1504 * Bits 52 and bit 55 have to be zero, otherwise a specification
1505 * exception will occur instead of a page translation exception. The
1506 * specification exception has the bad habit not to store necessary
1507 * information in the lowcore.
1508 * Bits 54 and 63 are used to indicate the page type.
1509 * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
1510 * This leaves the bits 0-51 and bits 56-62 to store type and offset.
1511 * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
1512 * for the offset.
1513 * |                      offset                        |01100|type |00|
1514 * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
1515 * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
1516 */
1517
1518#define __SWP_OFFSET_MASK       ((1UL << 52) - 1)
1519#define __SWP_OFFSET_SHIFT      12
1520#define __SWP_TYPE_MASK         ((1UL << 5) - 1)
1521#define __SWP_TYPE_SHIFT        2
1522
1523static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1524{
1525        pte_t pte;
1526
1527        pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
1528        pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
1529        pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
1530        return pte;
1531}
1532
1533static inline unsigned long __swp_type(swp_entry_t entry)
1534{
1535        return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
1536}
1537
1538static inline unsigned long __swp_offset(swp_entry_t entry)
1539{
1540        return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
1541}
1542
1543static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
1544{
1545        return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
1546}
1547
1548#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
1549#define __swp_entry_to_pte(x)   ((pte_t) { (x).val })
1550
1551#endif /* !__ASSEMBLY__ */
1552
1553#define kern_addr_valid(addr)   (1)
1554
1555extern int vmem_add_mapping(unsigned long start, unsigned long size);
1556extern int vmem_remove_mapping(unsigned long start, unsigned long size);
1557extern int s390_enable_sie(void);
1558extern int s390_enable_skey(void);
1559extern void s390_reset_cmma(struct mm_struct *mm);
1560
1561/* s390 has a private copy of get unmapped area to deal with cache synonyms */
1562#define HAVE_ARCH_UNMAPPED_AREA
1563#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1564
1565/*
1566 * No page table caches to initialise
1567 */
1568static inline void pgtable_cache_init(void) { }
1569static inline void check_pgt_cache(void) { }
1570
1571#include <asm-generic/pgtable.h>
1572
1573#endif /* _S390_PAGE_H */
1574