linux/arch/powerpc/mm/book3s64/radix_tlb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * TLB flush routines for radix kernels.
   4 *
   5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/hugetlb.h>
  10#include <linux/memblock.h>
  11#include <linux/mmu_context.h>
  12#include <linux/sched/mm.h>
  13#include <linux/debugfs.h>
  14
  15#include <asm/ppc-opcode.h>
  16#include <asm/tlb.h>
  17#include <asm/tlbflush.h>
  18#include <asm/trace.h>
  19#include <asm/cputhreads.h>
  20#include <asm/plpar_wrappers.h>
  21
  22#include "internal.h"
  23
  24/*
  25 * tlbiel instruction for radix, set invalidation
  26 * i.e., r=1 and is=01 or is=10 or is=11
  27 */
  28static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
  29                                        unsigned int pid,
  30                                        unsigned int ric, unsigned int prs)
  31{
  32        unsigned long rb;
  33        unsigned long rs;
  34
  35        rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
  36        rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
  37
  38        asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
  39                     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
  40                     : "memory");
  41}
  42
  43static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
  44{
  45        unsigned int set;
  46
  47        asm volatile("ptesync": : :"memory");
  48
  49        /*
  50         * Flush the first set of the TLB, and the entire Page Walk Cache
  51         * and partition table entries. Then flush the remaining sets of the
  52         * TLB.
  53         */
  54
  55        if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
  56                /* MSR[HV] should flush partition scope translations first. */
  57                tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
  58
  59                if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
  60                        for (set = 1; set < num_sets; set++)
  61                                tlbiel_radix_set_isa300(set, is, 0,
  62                                                        RIC_FLUSH_TLB, 0);
  63                }
  64        }
  65
  66        /* Flush process scoped entries. */
  67        tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
  68
  69        if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
  70                for (set = 1; set < num_sets; set++)
  71                        tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
  72        }
  73
  74        ppc_after_tlbiel_barrier();
  75}
  76
  77void radix__tlbiel_all(unsigned int action)
  78{
  79        unsigned int is;
  80
  81        switch (action) {
  82        case TLB_INVAL_SCOPE_GLOBAL:
  83                is = 3;
  84                break;
  85        case TLB_INVAL_SCOPE_LPID:
  86                is = 2;
  87                break;
  88        default:
  89                BUG();
  90        }
  91
  92        if (early_cpu_has_feature(CPU_FTR_ARCH_300))
  93                tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
  94        else
  95                WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
  96
  97        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
  98}
  99
 100static __always_inline void __tlbiel_pid(unsigned long pid, int set,
 101                                unsigned long ric)
 102{
 103        unsigned long rb,rs,prs,r;
 104
 105        rb = PPC_BIT(53); /* IS = 1 */
 106        rb |= set << PPC_BITLSHIFT(51);
 107        rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
 108        prs = 1; /* process scoped */
 109        r = 1;   /* radix format */
 110
 111        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 112                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 113        trace_tlbie(0, 1, rb, rs, ric, prs, r);
 114}
 115
 116static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 117{
 118        unsigned long rb,rs,prs,r;
 119
 120        rb = PPC_BIT(53); /* IS = 1 */
 121        rs = pid << PPC_BITLSHIFT(31);
 122        prs = 1; /* process scoped */
 123        r = 1;   /* radix format */
 124
 125        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 126                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 127        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 128}
 129
 130static __always_inline void __tlbie_pid_lpid(unsigned long pid,
 131                                             unsigned long lpid,
 132                                             unsigned long ric)
 133{
 134        unsigned long rb, rs, prs, r;
 135
 136        rb = PPC_BIT(53); /* IS = 1 */
 137        rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
 138        prs = 1; /* process scoped */
 139        r = 1;   /* radix format */
 140
 141        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 142                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 143        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 144}
 145static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 146{
 147        unsigned long rb,rs,prs,r;
 148
 149        rb = PPC_BIT(52); /* IS = 2 */
 150        rs = lpid;
 151        prs = 0; /* partition scoped */
 152        r = 1;   /* radix format */
 153
 154        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 155                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 156        trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 157}
 158
 159static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
 160{
 161        unsigned long rb,rs,prs,r;
 162
 163        rb = PPC_BIT(52); /* IS = 2 */
 164        rs = lpid;
 165        prs = 1; /* process scoped */
 166        r = 1;   /* radix format */
 167
 168        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 169                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 170        trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 171}
 172
 173static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
 174                                        unsigned long ap, unsigned long ric)
 175{
 176        unsigned long rb,rs,prs,r;
 177
 178        rb = va & ~(PPC_BITMASK(52, 63));
 179        rb |= ap << PPC_BITLSHIFT(58);
 180        rs = pid << PPC_BITLSHIFT(31);
 181        prs = 1; /* process scoped */
 182        r = 1;   /* radix format */
 183
 184        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 185                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 186        trace_tlbie(0, 1, rb, rs, ric, prs, r);
 187}
 188
 189static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
 190                                       unsigned long ap, unsigned long ric)
 191{
 192        unsigned long rb,rs,prs,r;
 193
 194        rb = va & ~(PPC_BITMASK(52, 63));
 195        rb |= ap << PPC_BITLSHIFT(58);
 196        rs = pid << PPC_BITLSHIFT(31);
 197        prs = 1; /* process scoped */
 198        r = 1;   /* radix format */
 199
 200        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 201                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 202        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 203}
 204
 205static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
 206                                            unsigned long lpid,
 207                                            unsigned long ap, unsigned long ric)
 208{
 209        unsigned long rb, rs, prs, r;
 210
 211        rb = va & ~(PPC_BITMASK(52, 63));
 212        rb |= ap << PPC_BITLSHIFT(58);
 213        rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
 214        prs = 1; /* process scoped */
 215        r = 1;   /* radix format */
 216
 217        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 218                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 219        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 220}
 221
 222static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
 223                                            unsigned long ap, unsigned long ric)
 224{
 225        unsigned long rb,rs,prs,r;
 226
 227        rb = va & ~(PPC_BITMASK(52, 63));
 228        rb |= ap << PPC_BITLSHIFT(58);
 229        rs = lpid;
 230        prs = 0; /* partition scoped */
 231        r = 1;   /* radix format */
 232
 233        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 234                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
 235        trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
 236}
 237
 238
 239static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
 240                                  unsigned long ap)
 241{
 242        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 243                asm volatile("ptesync": : :"memory");
 244                __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
 245        }
 246
 247        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 248                asm volatile("ptesync": : :"memory");
 249                __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
 250        }
 251}
 252
 253static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
 254                                        unsigned long ap)
 255{
 256        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 257                asm volatile("ptesync": : :"memory");
 258                __tlbie_pid(0, RIC_FLUSH_TLB);
 259        }
 260
 261        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 262                asm volatile("ptesync": : :"memory");
 263                __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
 264        }
 265}
 266
 267static inline void fixup_tlbie_va_range_lpid(unsigned long va,
 268                                             unsigned long pid,
 269                                             unsigned long lpid,
 270                                             unsigned long ap)
 271{
 272        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 273                asm volatile("ptesync" : : : "memory");
 274                __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
 275        }
 276
 277        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 278                asm volatile("ptesync" : : : "memory");
 279                __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
 280        }
 281}
 282
 283static inline void fixup_tlbie_pid(unsigned long pid)
 284{
 285        /*
 286         * We can use any address for the invalidation, pick one which is
 287         * probably unused as an optimisation.
 288         */
 289        unsigned long va = ((1UL << 52) - 1);
 290
 291        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 292                asm volatile("ptesync": : :"memory");
 293                __tlbie_pid(0, RIC_FLUSH_TLB);
 294        }
 295
 296        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 297                asm volatile("ptesync": : :"memory");
 298                __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
 299        }
 300}
 301
 302static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
 303{
 304        /*
 305         * We can use any address for the invalidation, pick one which is
 306         * probably unused as an optimisation.
 307         */
 308        unsigned long va = ((1UL << 52) - 1);
 309
 310        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 311                asm volatile("ptesync" : : : "memory");
 312                __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
 313        }
 314
 315        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 316                asm volatile("ptesync" : : : "memory");
 317                __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
 318                                RIC_FLUSH_TLB);
 319        }
 320}
 321
 322static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
 323                                       unsigned long ap)
 324{
 325        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 326                asm volatile("ptesync": : :"memory");
 327                __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
 328        }
 329
 330        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 331                asm volatile("ptesync": : :"memory");
 332                __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
 333        }
 334}
 335
 336static inline void fixup_tlbie_lpid(unsigned long lpid)
 337{
 338        /*
 339         * We can use any address for the invalidation, pick one which is
 340         * probably unused as an optimisation.
 341         */
 342        unsigned long va = ((1UL << 52) - 1);
 343
 344        if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
 345                asm volatile("ptesync": : :"memory");
 346                __tlbie_lpid(0, RIC_FLUSH_TLB);
 347        }
 348
 349        if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
 350                asm volatile("ptesync": : :"memory");
 351                __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
 352        }
 353}
 354
 355/*
 356 * We use 128 set in radix mode and 256 set in hpt mode.
 357 */
 358static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 359{
 360        int set;
 361
 362        asm volatile("ptesync": : :"memory");
 363
 364        switch (ric) {
 365        case RIC_FLUSH_PWC:
 366
 367                /* For PWC, only one flush is needed */
 368                __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
 369                ppc_after_tlbiel_barrier();
 370                return;
 371        case RIC_FLUSH_TLB:
 372                __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
 373                break;
 374        case RIC_FLUSH_ALL:
 375        default:
 376                /*
 377                 * Flush the first set of the TLB, and if
 378                 * we're doing a RIC_FLUSH_ALL, also flush
 379                 * the entire Page Walk Cache.
 380                 */
 381                __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
 382        }
 383
 384        if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
 385                /* For the remaining sets, just flush the TLB */
 386                for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
 387                        __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
 388        }
 389
 390        ppc_after_tlbiel_barrier();
 391        asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
 392}
 393
 394static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 395{
 396        asm volatile("ptesync": : :"memory");
 397
 398        /*
 399         * Workaround the fact that the "ric" argument to __tlbie_pid
 400         * must be a compile-time contraint to match the "i" constraint
 401         * in the asm statement.
 402         */
 403        switch (ric) {
 404        case RIC_FLUSH_TLB:
 405                __tlbie_pid(pid, RIC_FLUSH_TLB);
 406                fixup_tlbie_pid(pid);
 407                break;
 408        case RIC_FLUSH_PWC:
 409                __tlbie_pid(pid, RIC_FLUSH_PWC);
 410                break;
 411        case RIC_FLUSH_ALL:
 412        default:
 413                __tlbie_pid(pid, RIC_FLUSH_ALL);
 414                fixup_tlbie_pid(pid);
 415        }
 416        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 417}
 418
 419static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
 420                                   unsigned long ric)
 421{
 422        asm volatile("ptesync" : : : "memory");
 423
 424        /*
 425         * Workaround the fact that the "ric" argument to __tlbie_pid
 426         * must be a compile-time contraint to match the "i" constraint
 427         * in the asm statement.
 428         */
 429        switch (ric) {
 430        case RIC_FLUSH_TLB:
 431                __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
 432                fixup_tlbie_pid_lpid(pid, lpid);
 433                break;
 434        case RIC_FLUSH_PWC:
 435                __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
 436                break;
 437        case RIC_FLUSH_ALL:
 438        default:
 439                __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
 440                fixup_tlbie_pid_lpid(pid, lpid);
 441        }
 442        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 443}
 444struct tlbiel_pid {
 445        unsigned long pid;
 446        unsigned long ric;
 447};
 448
 449static void do_tlbiel_pid(void *info)
 450{
 451        struct tlbiel_pid *t = info;
 452
 453        if (t->ric == RIC_FLUSH_TLB)
 454                _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
 455        else if (t->ric == RIC_FLUSH_PWC)
 456                _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
 457        else
 458                _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
 459}
 460
 461static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
 462                                unsigned long pid, unsigned long ric)
 463{
 464        struct cpumask *cpus = mm_cpumask(mm);
 465        struct tlbiel_pid t = { .pid = pid, .ric = ric };
 466
 467        on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
 468        /*
 469         * Always want the CPU translations to be invalidated with tlbiel in
 470         * these paths, so while coprocessors must use tlbie, we can not
 471         * optimise away the tlbiel component.
 472         */
 473        if (atomic_read(&mm->context.copros) > 0)
 474                _tlbie_pid(pid, RIC_FLUSH_ALL);
 475}
 476
 477static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
 478{
 479        asm volatile("ptesync": : :"memory");
 480
 481        /*
 482         * Workaround the fact that the "ric" argument to __tlbie_pid
 483         * must be a compile-time contraint to match the "i" constraint
 484         * in the asm statement.
 485         */
 486        switch (ric) {
 487        case RIC_FLUSH_TLB:
 488                __tlbie_lpid(lpid, RIC_FLUSH_TLB);
 489                fixup_tlbie_lpid(lpid);
 490                break;
 491        case RIC_FLUSH_PWC:
 492                __tlbie_lpid(lpid, RIC_FLUSH_PWC);
 493                break;
 494        case RIC_FLUSH_ALL:
 495        default:
 496                __tlbie_lpid(lpid, RIC_FLUSH_ALL);
 497                fixup_tlbie_lpid(lpid);
 498        }
 499        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 500}
 501
 502static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
 503{
 504        /*
 505         * Workaround the fact that the "ric" argument to __tlbie_pid
 506         * must be a compile-time contraint to match the "i" constraint
 507         * in the asm statement.
 508         */
 509        switch (ric) {
 510        case RIC_FLUSH_TLB:
 511                __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
 512                break;
 513        case RIC_FLUSH_PWC:
 514                __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
 515                break;
 516        case RIC_FLUSH_ALL:
 517        default:
 518                __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
 519        }
 520        fixup_tlbie_lpid(lpid);
 521        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 522}
 523
 524static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
 525                                    unsigned long pid, unsigned long page_size,
 526                                    unsigned long psize)
 527{
 528        unsigned long addr;
 529        unsigned long ap = mmu_get_ap(psize);
 530
 531        for (addr = start; addr < end; addr += page_size)
 532                __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
 533}
 534
 535static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
 536                                       unsigned long psize, unsigned long ric)
 537{
 538        unsigned long ap = mmu_get_ap(psize);
 539
 540        asm volatile("ptesync": : :"memory");
 541        __tlbiel_va(va, pid, ap, ric);
 542        ppc_after_tlbiel_barrier();
 543}
 544
 545static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
 546                                    unsigned long pid, unsigned long page_size,
 547                                    unsigned long psize, bool also_pwc)
 548{
 549        asm volatile("ptesync": : :"memory");
 550        if (also_pwc)
 551                __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
 552        __tlbiel_va_range(start, end, pid, page_size, psize);
 553        ppc_after_tlbiel_barrier();
 554}
 555
 556static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 557                                    unsigned long pid, unsigned long page_size,
 558                                    unsigned long psize)
 559{
 560        unsigned long addr;
 561        unsigned long ap = mmu_get_ap(psize);
 562
 563        for (addr = start; addr < end; addr += page_size)
 564                __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
 565
 566        fixup_tlbie_va_range(addr - page_size, pid, ap);
 567}
 568
 569static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
 570                                         unsigned long pid, unsigned long lpid,
 571                                         unsigned long page_size,
 572                                         unsigned long psize)
 573{
 574        unsigned long addr;
 575        unsigned long ap = mmu_get_ap(psize);
 576
 577        for (addr = start; addr < end; addr += page_size)
 578                __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
 579
 580        fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
 581}
 582
 583static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 584                                      unsigned long psize, unsigned long ric)
 585{
 586        unsigned long ap = mmu_get_ap(psize);
 587
 588        asm volatile("ptesync": : :"memory");
 589        __tlbie_va(va, pid, ap, ric);
 590        fixup_tlbie_va(va, pid, ap);
 591        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 592}
 593
 594struct tlbiel_va {
 595        unsigned long pid;
 596        unsigned long va;
 597        unsigned long psize;
 598        unsigned long ric;
 599};
 600
 601static void do_tlbiel_va(void *info)
 602{
 603        struct tlbiel_va *t = info;
 604
 605        if (t->ric == RIC_FLUSH_TLB)
 606                _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
 607        else if (t->ric == RIC_FLUSH_PWC)
 608                _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
 609        else
 610                _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
 611}
 612
 613static inline void _tlbiel_va_multicast(struct mm_struct *mm,
 614                                unsigned long va, unsigned long pid,
 615                                unsigned long psize, unsigned long ric)
 616{
 617        struct cpumask *cpus = mm_cpumask(mm);
 618        struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
 619        on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
 620        if (atomic_read(&mm->context.copros) > 0)
 621                _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
 622}
 623
 624struct tlbiel_va_range {
 625        unsigned long pid;
 626        unsigned long start;
 627        unsigned long end;
 628        unsigned long page_size;
 629        unsigned long psize;
 630        bool also_pwc;
 631};
 632
 633static void do_tlbiel_va_range(void *info)
 634{
 635        struct tlbiel_va_range *t = info;
 636
 637        _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
 638                                    t->psize, t->also_pwc);
 639}
 640
 641static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
 642                              unsigned long psize, unsigned long ric)
 643{
 644        unsigned long ap = mmu_get_ap(psize);
 645
 646        asm volatile("ptesync": : :"memory");
 647        __tlbie_lpid_va(va, lpid, ap, ric);
 648        fixup_tlbie_lpid_va(va, lpid, ap);
 649        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 650}
 651
 652static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 653                                    unsigned long pid, unsigned long page_size,
 654                                    unsigned long psize, bool also_pwc)
 655{
 656        asm volatile("ptesync": : :"memory");
 657        if (also_pwc)
 658                __tlbie_pid(pid, RIC_FLUSH_PWC);
 659        __tlbie_va_range(start, end, pid, page_size, psize);
 660        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 661}
 662
 663static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
 664                                        unsigned long pid, unsigned long lpid,
 665                                        unsigned long page_size,
 666                                        unsigned long psize, bool also_pwc)
 667{
 668        asm volatile("ptesync" : : : "memory");
 669        if (also_pwc)
 670                __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
 671        __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
 672        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 673}
 674
 675static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 676                                unsigned long start, unsigned long end,
 677                                unsigned long pid, unsigned long page_size,
 678                                unsigned long psize, bool also_pwc)
 679{
 680        struct cpumask *cpus = mm_cpumask(mm);
 681        struct tlbiel_va_range t = { .start = start, .end = end,
 682                                .pid = pid, .page_size = page_size,
 683                                .psize = psize, .also_pwc = also_pwc };
 684
 685        on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
 686        if (atomic_read(&mm->context.copros) > 0)
 687                _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
 688}
 689
 690/*
 691 * Base TLB flushing operations:
 692 *
 693 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
 694 *  - flush_tlb_page(vma, vmaddr) flushes one page
 695 *  - flush_tlb_range(vma, start, end) flushes a range of pages
 696 *  - flush_tlb_kernel_range(start, end) flushes kernel pages
 697 *
 698 *  - local_* variants of page and mm only apply to the current
 699 *    processor
 700 */
 701void radix__local_flush_tlb_mm(struct mm_struct *mm)
 702{
 703        unsigned long pid;
 704
 705        preempt_disable();
 706        pid = mm->context.id;
 707        if (pid != MMU_NO_CONTEXT)
 708                _tlbiel_pid(pid, RIC_FLUSH_TLB);
 709        preempt_enable();
 710}
 711EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 712
 713#ifndef CONFIG_SMP
 714void radix__local_flush_all_mm(struct mm_struct *mm)
 715{
 716        unsigned long pid;
 717
 718        preempt_disable();
 719        pid = mm->context.id;
 720        if (pid != MMU_NO_CONTEXT)
 721                _tlbiel_pid(pid, RIC_FLUSH_ALL);
 722        preempt_enable();
 723}
 724EXPORT_SYMBOL(radix__local_flush_all_mm);
 725
 726static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 727{
 728        radix__local_flush_all_mm(mm);
 729}
 730#endif /* CONFIG_SMP */
 731
 732void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 733                                       int psize)
 734{
 735        unsigned long pid;
 736
 737        preempt_disable();
 738        pid = mm->context.id;
 739        if (pid != MMU_NO_CONTEXT)
 740                _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 741        preempt_enable();
 742}
 743
 744void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 745{
 746#ifdef CONFIG_HUGETLB_PAGE
 747        /* need the return fix for nohash.c */
 748        if (is_vm_hugetlb_page(vma))
 749                return radix__local_flush_hugetlb_page(vma, vmaddr);
 750#endif
 751        radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
 752}
 753EXPORT_SYMBOL(radix__local_flush_tlb_page);
 754
 755static bool mm_needs_flush_escalation(struct mm_struct *mm)
 756{
 757        /*
 758         * P9 nest MMU has issues with the page walk cache
 759         * caching PTEs and not flushing them properly when
 760         * RIC = 0 for a PID/LPID invalidate
 761         */
 762        if (atomic_read(&mm->context.copros) > 0)
 763                return true;
 764        return false;
 765}
 766
 767/*
 768 * If always_flush is true, then flush even if this CPU can't be removed
 769 * from mm_cpumask.
 770 */
 771void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 772{
 773        unsigned long pid = mm->context.id;
 774        int cpu = smp_processor_id();
 775
 776        /*
 777         * A kthread could have done a mmget_not_zero() after the flushing CPU
 778         * checked mm_cpumask, and be in the process of kthread_use_mm when
 779         * interrupted here. In that case, current->mm will be set to mm,
 780         * because kthread_use_mm() setting ->mm and switching to the mm is
 781         * done with interrupts off.
 782         */
 783        if (current->mm == mm)
 784                goto out;
 785
 786        if (current->active_mm == mm) {
 787                WARN_ON_ONCE(current->mm != NULL);
 788                /* Is a kernel thread and is using mm as the lazy tlb */
 789                mmgrab(&init_mm);
 790                current->active_mm = &init_mm;
 791                switch_mm_irqs_off(mm, &init_mm, current);
 792                mmdrop(mm);
 793        }
 794
 795        /*
 796         * This IPI may be initiated from any source including those not
 797         * running the mm, so there may be a racing IPI that comes after
 798         * this one which finds the cpumask already clear. Check and avoid
 799         * underflowing the active_cpus count in that case. The race should
 800         * not otherwise be a problem, but the TLB must be flushed because
 801         * that's what the caller expects.
 802         */
 803        if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 804                atomic_dec(&mm->context.active_cpus);
 805                cpumask_clear_cpu(cpu, mm_cpumask(mm));
 806                always_flush = true;
 807        }
 808
 809out:
 810        if (always_flush)
 811                _tlbiel_pid(pid, RIC_FLUSH_ALL);
 812}
 813
 814#ifdef CONFIG_SMP
 815static void do_exit_flush_lazy_tlb(void *arg)
 816{
 817        struct mm_struct *mm = arg;
 818        exit_lazy_flush_tlb(mm, true);
 819}
 820
 821static void exit_flush_lazy_tlbs(struct mm_struct *mm)
 822{
 823        /*
 824         * Would be nice if this was async so it could be run in
 825         * parallel with our local flush, but generic code does not
 826         * give a good API for it. Could extend the generic code or
 827         * make a special powerpc IPI for flushing TLBs.
 828         * For now it's not too performance critical.
 829         */
 830        smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
 831                                (void *)mm, 1);
 832}
 833
 834#else /* CONFIG_SMP */
 835static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
 836#endif /* CONFIG_SMP */
 837
 838static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
 839
 840/*
 841 * Interval between flushes at which we send out IPIs to check whether the
 842 * mm_cpumask can be trimmed for the case where it's not a single-threaded
 843 * process flushing its own mm. The intent is to reduce the cost of later
 844 * flushes. Don't want this to be so low that it adds noticable cost to TLB
 845 * flushing, or so high that it doesn't help reduce global TLBIEs.
 846 */
 847static unsigned long tlb_mm_cpumask_trim_timer = 1073;
 848
 849static bool tick_and_test_trim_clock(void)
 850{
 851        if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
 852                        tlb_mm_cpumask_trim_timer) {
 853                __this_cpu_write(mm_cpumask_trim_clock, 0);
 854                return true;
 855        }
 856        return false;
 857}
 858
 859enum tlb_flush_type {
 860        FLUSH_TYPE_NONE,
 861        FLUSH_TYPE_LOCAL,
 862        FLUSH_TYPE_GLOBAL,
 863};
 864
 865static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 866{
 867        int active_cpus = atomic_read(&mm->context.active_cpus);
 868        int cpu = smp_processor_id();
 869
 870        if (active_cpus == 0)
 871                return FLUSH_TYPE_NONE;
 872        if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 873                if (current->mm != mm) {
 874                        /*
 875                         * Asynchronous flush sources may trim down to nothing
 876                         * if the process is not running, so occasionally try
 877                         * to trim.
 878                         */
 879                        if (tick_and_test_trim_clock()) {
 880                                exit_lazy_flush_tlb(mm, true);
 881                                return FLUSH_TYPE_NONE;
 882                        }
 883                }
 884                return FLUSH_TYPE_LOCAL;
 885        }
 886
 887        /* Coprocessors require TLBIE to invalidate nMMU. */
 888        if (atomic_read(&mm->context.copros) > 0)
 889                return FLUSH_TYPE_GLOBAL;
 890
 891        /*
 892         * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
 893         * because the mm is being taken down anyway, and a TLBIE tends to
 894         * be faster than an IPI+TLBIEL.
 895         */
 896        if (fullmm)
 897                return FLUSH_TYPE_GLOBAL;
 898
 899        /*
 900         * If we are running the only thread of a single-threaded process,
 901         * then we should almost always be able to trim off the rest of the
 902         * CPU mask (except in the case of use_mm() races), so always try
 903         * trimming the mask.
 904         */
 905        if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
 906                exit_flush_lazy_tlbs(mm);
 907                /*
 908                 * use_mm() race could prevent IPIs from being able to clear
 909                 * the cpumask here, however those users are established
 910                 * after our first check (and so after the PTEs are removed),
 911                 * and the TLB still gets flushed by the IPI, so this CPU
 912                 * will only require a local flush.
 913                 */
 914                return FLUSH_TYPE_LOCAL;
 915        }
 916
 917        /*
 918         * Occasionally try to trim down the cpumask. It's possible this can
 919         * bring the mask to zero, which results in no flush.
 920         */
 921        if (tick_and_test_trim_clock()) {
 922                exit_flush_lazy_tlbs(mm);
 923                if (current->mm == mm)
 924                        return FLUSH_TYPE_LOCAL;
 925                if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
 926                        exit_lazy_flush_tlb(mm, true);
 927                return FLUSH_TYPE_NONE;
 928        }
 929
 930        return FLUSH_TYPE_GLOBAL;
 931}
 932
 933#ifdef CONFIG_SMP
 934void radix__flush_tlb_mm(struct mm_struct *mm)
 935{
 936        unsigned long pid;
 937        enum tlb_flush_type type;
 938
 939        pid = mm->context.id;
 940        if (unlikely(pid == MMU_NO_CONTEXT))
 941                return;
 942
 943        preempt_disable();
 944        /*
 945         * Order loads of mm_cpumask (in flush_type_needed) vs previous
 946         * stores to clear ptes before the invalidate. See barrier in
 947         * switch_mm_irqs_off
 948         */
 949        smp_mb();
 950        type = flush_type_needed(mm, false);
 951        if (type == FLUSH_TYPE_LOCAL) {
 952                _tlbiel_pid(pid, RIC_FLUSH_TLB);
 953        } else if (type == FLUSH_TYPE_GLOBAL) {
 954                if (!mmu_has_feature(MMU_FTR_GTSE)) {
 955                        unsigned long tgt = H_RPTI_TARGET_CMMU;
 956
 957                        if (atomic_read(&mm->context.copros) > 0)
 958                                tgt |= H_RPTI_TARGET_NMMU;
 959                        pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
 960                                               H_RPTI_PAGE_ALL, 0, -1UL);
 961                } else if (cputlb_use_tlbie()) {
 962                        if (mm_needs_flush_escalation(mm))
 963                                _tlbie_pid(pid, RIC_FLUSH_ALL);
 964                        else
 965                                _tlbie_pid(pid, RIC_FLUSH_TLB);
 966                } else {
 967                        _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
 968                }
 969        }
 970        preempt_enable();
 971}
 972EXPORT_SYMBOL(radix__flush_tlb_mm);
 973
 974static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 975{
 976        unsigned long pid;
 977        enum tlb_flush_type type;
 978
 979        pid = mm->context.id;
 980        if (unlikely(pid == MMU_NO_CONTEXT))
 981                return;
 982
 983        preempt_disable();
 984        smp_mb(); /* see radix__flush_tlb_mm */
 985        type = flush_type_needed(mm, fullmm);
 986        if (type == FLUSH_TYPE_LOCAL) {
 987                _tlbiel_pid(pid, RIC_FLUSH_ALL);
 988        } else if (type == FLUSH_TYPE_GLOBAL) {
 989                if (!mmu_has_feature(MMU_FTR_GTSE)) {
 990                        unsigned long tgt = H_RPTI_TARGET_CMMU;
 991                        unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
 992                                             H_RPTI_TYPE_PRT;
 993
 994                        if (atomic_read(&mm->context.copros) > 0)
 995                                tgt |= H_RPTI_TARGET_NMMU;
 996                        pseries_rpt_invalidate(pid, tgt, type,
 997                                               H_RPTI_PAGE_ALL, 0, -1UL);
 998                } else if (cputlb_use_tlbie())
 999                        _tlbie_pid(pid, RIC_FLUSH_ALL);
1000                else
1001                        _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1002        }
1003        preempt_enable();
1004}
1005
1006void radix__flush_all_mm(struct mm_struct *mm)
1007{
1008        __flush_all_mm(mm, false);
1009}
1010EXPORT_SYMBOL(radix__flush_all_mm);
1011
1012void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
1013                                 int psize)
1014{
1015        unsigned long pid;
1016        enum tlb_flush_type type;
1017
1018        pid = mm->context.id;
1019        if (unlikely(pid == MMU_NO_CONTEXT))
1020                return;
1021
1022        preempt_disable();
1023        smp_mb(); /* see radix__flush_tlb_mm */
1024        type = flush_type_needed(mm, false);
1025        if (type == FLUSH_TYPE_LOCAL) {
1026                _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1027        } else if (type == FLUSH_TYPE_GLOBAL) {
1028                if (!mmu_has_feature(MMU_FTR_GTSE)) {
1029                        unsigned long tgt, pg_sizes, size;
1030
1031                        tgt = H_RPTI_TARGET_CMMU;
1032                        pg_sizes = psize_to_rpti_pgsize(psize);
1033                        size = 1UL << mmu_psize_to_shift(psize);
1034
1035                        if (atomic_read(&mm->context.copros) > 0)
1036                                tgt |= H_RPTI_TARGET_NMMU;
1037                        pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
1038                                               pg_sizes, vmaddr,
1039                                               vmaddr + size);
1040                } else if (cputlb_use_tlbie())
1041                        _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1042                else
1043                        _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
1044        }
1045        preempt_enable();
1046}
1047
1048void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
1049{
1050#ifdef CONFIG_HUGETLB_PAGE
1051        if (is_vm_hugetlb_page(vma))
1052                return radix__flush_hugetlb_page(vma, vmaddr);
1053#endif
1054        radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1055}
1056EXPORT_SYMBOL(radix__flush_tlb_page);
1057
1058#endif /* CONFIG_SMP */
1059
1060static void do_tlbiel_kernel(void *info)
1061{
1062        _tlbiel_pid(0, RIC_FLUSH_ALL);
1063}
1064
1065static inline void _tlbiel_kernel_broadcast(void)
1066{
1067        on_each_cpu(do_tlbiel_kernel, NULL, 1);
1068        if (tlbie_capable) {
1069                /*
1070                 * Coherent accelerators don't refcount kernel memory mappings,
1071                 * so have to always issue a tlbie for them. This is quite a
1072                 * slow path anyway.
1073                 */
1074                _tlbie_pid(0, RIC_FLUSH_ALL);
1075        }
1076}
1077
1078/*
1079 * If kernel TLBIs ever become local rather than global, then
1080 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
1081 * assumes kernel TLBIs are global.
1082 */
1083void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
1084{
1085        if (!mmu_has_feature(MMU_FTR_GTSE)) {
1086                unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
1087                unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1088                                     H_RPTI_TYPE_PRT;
1089
1090                pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
1091                                       start, end);
1092        } else if (cputlb_use_tlbie())
1093                _tlbie_pid(0, RIC_FLUSH_ALL);
1094        else
1095                _tlbiel_kernel_broadcast();
1096}
1097EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
1098
1099#define TLB_FLUSH_ALL -1UL
1100
1101/*
1102 * Number of pages above which we invalidate the entire PID rather than
1103 * flush individual pages, for local and global flushes respectively.
1104 *
1105 * tlbie goes out to the interconnect and individual ops are more costly.
1106 * It also does not iterate over sets like the local tlbiel variant when
1107 * invalidating a full PID, so it has a far lower threshold to change from
1108 * individual page flushes to full-pid flushes.
1109 */
1110static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
1111static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
1112
1113static inline void __radix__flush_tlb_range(struct mm_struct *mm,
1114                                            unsigned long start, unsigned long end)
1115{
1116        unsigned long pid;
1117        unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1118        unsigned long page_size = 1UL << page_shift;
1119        unsigned long nr_pages = (end - start) >> page_shift;
1120        bool fullmm = (end == TLB_FLUSH_ALL);
1121        bool flush_pid, flush_pwc = false;
1122        enum tlb_flush_type type;
1123
1124        pid = mm->context.id;
1125        if (unlikely(pid == MMU_NO_CONTEXT))
1126                return;
1127
1128        preempt_disable();
1129        smp_mb(); /* see radix__flush_tlb_mm */
1130        type = flush_type_needed(mm, fullmm);
1131        if (type == FLUSH_TYPE_NONE)
1132                goto out;
1133
1134        if (fullmm)
1135                flush_pid = true;
1136        else if (type == FLUSH_TYPE_GLOBAL)
1137                flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1138        else
1139                flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1140        /*
1141         * full pid flush already does the PWC flush. if it is not full pid
1142         * flush check the range is more than PMD and force a pwc flush
1143         * mremap() depends on this behaviour.
1144         */
1145        if (!flush_pid && (end - start) >= PMD_SIZE)
1146                flush_pwc = true;
1147
1148        if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1149                unsigned long type = H_RPTI_TYPE_TLB;
1150                unsigned long tgt = H_RPTI_TARGET_CMMU;
1151                unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1152
1153                if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1154                        pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1155                if (atomic_read(&mm->context.copros) > 0)
1156                        tgt |= H_RPTI_TARGET_NMMU;
1157                if (flush_pwc)
1158                        type |= H_RPTI_TYPE_PWC;
1159                pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1160        } else if (flush_pid) {
1161                /*
1162                 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
1163                 */
1164                if (type == FLUSH_TYPE_LOCAL) {
1165                        _tlbiel_pid(pid, RIC_FLUSH_ALL);
1166                } else {
1167                        if (cputlb_use_tlbie()) {
1168                                _tlbie_pid(pid, RIC_FLUSH_ALL);
1169                        } else {
1170                                _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1171                        }
1172                }
1173        } else {
1174                bool hflush = false;
1175                unsigned long hstart, hend;
1176
1177                if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1178                        hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1179                        hend = end & PMD_MASK;
1180                        if (hstart < hend)
1181                                hflush = true;
1182                }
1183
1184                if (type == FLUSH_TYPE_LOCAL) {
1185                        asm volatile("ptesync": : :"memory");
1186                        if (flush_pwc)
1187                                /* For PWC, only one flush is needed */
1188                                __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
1189                        __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1190                        if (hflush)
1191                                __tlbiel_va_range(hstart, hend, pid,
1192                                                PMD_SIZE, MMU_PAGE_2M);
1193                        ppc_after_tlbiel_barrier();
1194                } else if (cputlb_use_tlbie()) {
1195                        asm volatile("ptesync": : :"memory");
1196                        if (flush_pwc)
1197                                __tlbie_pid(pid, RIC_FLUSH_PWC);
1198                        __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1199                        if (hflush)
1200                                __tlbie_va_range(hstart, hend, pid,
1201                                                PMD_SIZE, MMU_PAGE_2M);
1202                        asm volatile("eieio; tlbsync; ptesync": : :"memory");
1203                } else {
1204                        _tlbiel_va_range_multicast(mm,
1205                                        start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
1206                        if (hflush)
1207                                _tlbiel_va_range_multicast(mm,
1208                                        hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
1209                }
1210        }
1211out:
1212        preempt_enable();
1213}
1214
1215void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1216                     unsigned long end)
1217
1218{
1219#ifdef CONFIG_HUGETLB_PAGE
1220        if (is_vm_hugetlb_page(vma))
1221                return radix__flush_hugetlb_tlb_range(vma, start, end);
1222#endif
1223
1224        __radix__flush_tlb_range(vma->vm_mm, start, end);
1225}
1226EXPORT_SYMBOL(radix__flush_tlb_range);
1227
1228static int radix_get_mmu_psize(int page_size)
1229{
1230        int psize;
1231
1232        if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1233                psize = mmu_virtual_psize;
1234        else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1235                psize = MMU_PAGE_2M;
1236        else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1237                psize = MMU_PAGE_1G;
1238        else
1239                return -1;
1240        return psize;
1241}
1242
1243/*
1244 * Flush partition scoped LPID address translation for all CPUs.
1245 */
1246void radix__flush_tlb_lpid_page(unsigned int lpid,
1247                                        unsigned long addr,
1248                                        unsigned long page_size)
1249{
1250        int psize = radix_get_mmu_psize(page_size);
1251
1252        _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1253}
1254EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1255
1256/*
1257 * Flush partition scoped PWC from LPID for all CPUs.
1258 */
1259void radix__flush_pwc_lpid(unsigned int lpid)
1260{
1261        _tlbie_lpid(lpid, RIC_FLUSH_PWC);
1262}
1263EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1264
1265/*
1266 * Flush partition scoped translations from LPID (=LPIDR)
1267 */
1268void radix__flush_all_lpid(unsigned int lpid)
1269{
1270        _tlbie_lpid(lpid, RIC_FLUSH_ALL);
1271}
1272EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1273
1274/*
1275 * Flush process scoped translations from LPID (=LPIDR)
1276 */
1277void radix__flush_all_lpid_guest(unsigned int lpid)
1278{
1279        _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1280}
1281
1282void radix__tlb_flush(struct mmu_gather *tlb)
1283{
1284        int psize = 0;
1285        struct mm_struct *mm = tlb->mm;
1286        int page_size = tlb->page_size;
1287        unsigned long start = tlb->start;
1288        unsigned long end = tlb->end;
1289
1290        /*
1291         * if page size is not something we understand, do a full mm flush
1292         *
1293         * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1294         * that flushes the process table entry cache upon process teardown.
1295         * See the comment for radix in arch_exit_mmap().
1296         */
1297        if (tlb->fullmm || tlb->need_flush_all) {
1298                __flush_all_mm(mm, true);
1299        } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1300                if (!tlb->freed_tables)
1301                        radix__flush_tlb_mm(mm);
1302                else
1303                        radix__flush_all_mm(mm);
1304        } else {
1305                if (!tlb->freed_tables)
1306                        radix__flush_tlb_range_psize(mm, start, end, psize);
1307                else
1308                        radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1309        }
1310}
1311
1312static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1313                                unsigned long start, unsigned long end,
1314                                int psize, bool also_pwc)
1315{
1316        unsigned long pid;
1317        unsigned int page_shift = mmu_psize_defs[psize].shift;
1318        unsigned long page_size = 1UL << page_shift;
1319        unsigned long nr_pages = (end - start) >> page_shift;
1320        bool fullmm = (end == TLB_FLUSH_ALL);
1321        bool flush_pid;
1322        enum tlb_flush_type type;
1323
1324        pid = mm->context.id;
1325        if (unlikely(pid == MMU_NO_CONTEXT))
1326                return;
1327
1328        fullmm = (end == TLB_FLUSH_ALL);
1329
1330        preempt_disable();
1331        smp_mb(); /* see radix__flush_tlb_mm */
1332        type = flush_type_needed(mm, fullmm);
1333        if (type == FLUSH_TYPE_NONE)
1334                goto out;
1335
1336        if (fullmm)
1337                flush_pid = true;
1338        else if (type == FLUSH_TYPE_GLOBAL)
1339                flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1340        else
1341                flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1342
1343        if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1344                unsigned long tgt = H_RPTI_TARGET_CMMU;
1345                unsigned long type = H_RPTI_TYPE_TLB;
1346                unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1347
1348                if (also_pwc)
1349                        type |= H_RPTI_TYPE_PWC;
1350                if (atomic_read(&mm->context.copros) > 0)
1351                        tgt |= H_RPTI_TARGET_NMMU;
1352                pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1353        } else if (flush_pid) {
1354                if (type == FLUSH_TYPE_LOCAL) {
1355                        _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1356                } else {
1357                        if (cputlb_use_tlbie()) {
1358                                if (mm_needs_flush_escalation(mm))
1359                                        also_pwc = true;
1360
1361                                _tlbie_pid(pid,
1362                                        also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1363                        } else {
1364                                _tlbiel_pid_multicast(mm, pid,
1365                                        also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1366                        }
1367
1368                }
1369        } else {
1370                if (type == FLUSH_TYPE_LOCAL)
1371                        _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1372                else if (cputlb_use_tlbie())
1373                        _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1374                else
1375                        _tlbiel_va_range_multicast(mm,
1376                                        start, end, pid, page_size, psize, also_pwc);
1377        }
1378out:
1379        preempt_enable();
1380}
1381
1382void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1383                                  unsigned long end, int psize)
1384{
1385        return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1386}
1387
1388void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1389                                      unsigned long end, int psize)
1390{
1391        __radix__flush_tlb_range_psize(mm, start, end, psize, true);
1392}
1393
1394#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1395void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1396{
1397        unsigned long pid, end;
1398        enum tlb_flush_type type;
1399
1400        pid = mm->context.id;
1401        if (unlikely(pid == MMU_NO_CONTEXT))
1402                return;
1403
1404        /* 4k page size, just blow the world */
1405        if (PAGE_SIZE == 0x1000) {
1406                radix__flush_all_mm(mm);
1407                return;
1408        }
1409
1410        end = addr + HPAGE_PMD_SIZE;
1411
1412        /* Otherwise first do the PWC, then iterate the pages. */
1413        preempt_disable();
1414        smp_mb(); /* see radix__flush_tlb_mm */
1415        type = flush_type_needed(mm, false);
1416        if (type == FLUSH_TYPE_LOCAL) {
1417                _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1418        } else if (type == FLUSH_TYPE_GLOBAL) {
1419                if (!mmu_has_feature(MMU_FTR_GTSE)) {
1420                        unsigned long tgt, type, pg_sizes;
1421
1422                        tgt = H_RPTI_TARGET_CMMU;
1423                        type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1424                               H_RPTI_TYPE_PRT;
1425                        pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1426
1427                        if (atomic_read(&mm->context.copros) > 0)
1428                                tgt |= H_RPTI_TARGET_NMMU;
1429                        pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1430                                               addr, end);
1431                } else if (cputlb_use_tlbie())
1432                        _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1433                else
1434                        _tlbiel_va_range_multicast(mm,
1435                                        addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1436        }
1437
1438        preempt_enable();
1439}
1440#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1441
1442void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1443                                unsigned long start, unsigned long end)
1444{
1445        radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1446}
1447EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1448
1449void radix__flush_tlb_all(void)
1450{
1451        unsigned long rb,prs,r,rs;
1452        unsigned long ric = RIC_FLUSH_ALL;
1453
1454        rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1455        prs = 0; /* partition scoped */
1456        r = 1;   /* radix format */
1457        rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1458
1459        asm volatile("ptesync": : :"memory");
1460        /*
1461         * now flush guest entries by passing PRS = 1 and LPID != 0
1462         */
1463        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1464                     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1465        /*
1466         * now flush host entires by passing PRS = 0 and LPID == 0
1467         */
1468        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1469                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1470        asm volatile("eieio; tlbsync; ptesync": : :"memory");
1471}
1472
1473#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1474/*
1475 * Performs process-scoped invalidations for a given LPID
1476 * as part of H_RPT_INVALIDATE hcall.
1477 */
1478void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
1479                             unsigned long type, unsigned long pg_sizes,
1480                             unsigned long start, unsigned long end)
1481{
1482        unsigned long psize, nr_pages;
1483        struct mmu_psize_def *def;
1484        bool flush_pid;
1485
1486        /*
1487         * A H_RPTI_TYPE_ALL request implies RIC=3, hence
1488         * do a single IS=1 based flush.
1489         */
1490        if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
1491                _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1492                return;
1493        }
1494
1495        if (type & H_RPTI_TYPE_PWC)
1496                _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1497
1498        /* Full PID flush */
1499        if (start == 0 && end == -1)
1500                return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1501
1502        /* Do range invalidation for all the valid page sizes */
1503        for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1504                def = &mmu_psize_defs[psize];
1505                if (!(pg_sizes & def->h_rpt_pgsize))
1506                        continue;
1507
1508                nr_pages = (end - start) >> def->shift;
1509                flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1510
1511                /*
1512                 * If the number of pages spanning the range is above
1513                 * the ceiling, convert the request into a full PID flush.
1514                 * And since PID flush takes out all the page sizes, there
1515                 * is no need to consider remaining page sizes.
1516                 */
1517                if (flush_pid) {
1518                        _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1519                        return;
1520                }
1521                _tlbie_va_range_lpid(start, end, pid, lpid,
1522                                     (1UL << def->shift), psize, false);
1523        }
1524}
1525EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
1526
1527#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1528
1529static int __init create_tlb_single_page_flush_ceiling(void)
1530{
1531        debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
1532                           arch_debugfs_dir, &tlb_single_page_flush_ceiling);
1533        debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
1534                           arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
1535        return 0;
1536}
1537late_initcall(create_tlb_single_page_flush_ceiling);
1538
1539