linux/arch/um/kernel/tlb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   4 */
   5
   6#include <linux/mm.h>
   7#include <linux/module.h>
   8#include <linux/sched/signal.h>
   9
  10#include <asm/pgtable.h>
  11#include <asm/tlbflush.h>
  12#include <as-layout.h>
  13#include <mem_user.h>
  14#include <os.h>
  15#include <skas.h>
  16#include <kern_util.h>
  17
  18struct host_vm_change {
  19        struct host_vm_op {
  20                enum { NONE, MMAP, MUNMAP, MPROTECT } type;
  21                union {
  22                        struct {
  23                                unsigned long addr;
  24                                unsigned long len;
  25                                unsigned int prot;
  26                                int fd;
  27                                __u64 offset;
  28                        } mmap;
  29                        struct {
  30                                unsigned long addr;
  31                                unsigned long len;
  32                        } munmap;
  33                        struct {
  34                                unsigned long addr;
  35                                unsigned long len;
  36                                unsigned int prot;
  37                        } mprotect;
  38                } u;
  39        } ops[1];
  40        int userspace;
  41        int index;
  42        struct mm_struct *mm;
  43        void *data;
  44        int force;
  45};
  46
  47#define INIT_HVC(mm, force, userspace) \
  48        ((struct host_vm_change) \
  49         { .ops         = { { .type = NONE } }, \
  50           .mm          = mm, \
  51           .data        = NULL, \
  52           .userspace   = userspace, \
  53           .index       = 0, \
  54           .force       = force })
  55
  56static void report_enomem(void)
  57{
  58        printk(KERN_ERR "UML ran out of memory on the host side! "
  59                        "This can happen due to a memory limitation or "
  60                        "vm.max_map_count has been reached.\n");
  61}
  62
  63static int do_ops(struct host_vm_change *hvc, int end,
  64                  int finished)
  65{
  66        struct host_vm_op *op;
  67        int i, ret = 0;
  68
  69        for (i = 0; i < end && !ret; i++) {
  70                op = &hvc->ops[i];
  71                switch (op->type) {
  72                case MMAP:
  73                        if (hvc->userspace)
  74                                ret = map(&hvc->mm->context.id, op->u.mmap.addr,
  75                                          op->u.mmap.len, op->u.mmap.prot,
  76                                          op->u.mmap.fd,
  77                                          op->u.mmap.offset, finished,
  78                                          &hvc->data);
  79                        else
  80                                map_memory(op->u.mmap.addr, op->u.mmap.offset,
  81                                           op->u.mmap.len, 1, 1, 1);
  82                        break;
  83                case MUNMAP:
  84                        if (hvc->userspace)
  85                                ret = unmap(&hvc->mm->context.id,
  86                                            op->u.munmap.addr,
  87                                            op->u.munmap.len, finished,
  88                                            &hvc->data);
  89                        else
  90                                ret = os_unmap_memory(
  91                                        (void *) op->u.munmap.addr,
  92                                                      op->u.munmap.len);
  93
  94                        break;
  95                case MPROTECT:
  96                        if (hvc->userspace)
  97                                ret = protect(&hvc->mm->context.id,
  98                                              op->u.mprotect.addr,
  99                                              op->u.mprotect.len,
 100                                              op->u.mprotect.prot,
 101                                              finished, &hvc->data);
 102                        else
 103                                ret = os_protect_memory(
 104                                        (void *) op->u.mprotect.addr,
 105                                                        op->u.mprotect.len,
 106                                                        1, 1, 1);
 107                        break;
 108                default:
 109                        printk(KERN_ERR "Unknown op type %d in do_ops\n",
 110                               op->type);
 111                        BUG();
 112                        break;
 113                }
 114        }
 115
 116        if (ret == -ENOMEM)
 117                report_enomem();
 118
 119        return ret;
 120}
 121
 122static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
 123                    unsigned int prot, struct host_vm_change *hvc)
 124{
 125        __u64 offset;
 126        struct host_vm_op *last;
 127        int fd = -1, ret = 0;
 128
 129        if (hvc->userspace)
 130                fd = phys_mapping(phys, &offset);
 131        else
 132                offset = phys;
 133        if (hvc->index != 0) {
 134                last = &hvc->ops[hvc->index - 1];
 135                if ((last->type == MMAP) &&
 136                   (last->u.mmap.addr + last->u.mmap.len == virt) &&
 137                   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
 138                   (last->u.mmap.offset + last->u.mmap.len == offset)) {
 139                        last->u.mmap.len += len;
 140                        return 0;
 141                }
 142        }
 143
 144        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 145                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 146                hvc->index = 0;
 147        }
 148
 149        hvc->ops[hvc->index++] = ((struct host_vm_op)
 150                                  { .type       = MMAP,
 151                                    .u = { .mmap = { .addr      = virt,
 152                                                     .len       = len,
 153                                                     .prot      = prot,
 154                                                     .fd        = fd,
 155                                                     .offset    = offset }
 156                           } });
 157        return ret;
 158}
 159
 160static int add_munmap(unsigned long addr, unsigned long len,
 161                      struct host_vm_change *hvc)
 162{
 163        struct host_vm_op *last;
 164        int ret = 0;
 165
 166        if ((addr >= STUB_START) && (addr < STUB_END))
 167                return -EINVAL;
 168
 169        if (hvc->index != 0) {
 170                last = &hvc->ops[hvc->index - 1];
 171                if ((last->type == MUNMAP) &&
 172                   (last->u.munmap.addr + last->u.mmap.len == addr)) {
 173                        last->u.munmap.len += len;
 174                        return 0;
 175                }
 176        }
 177
 178        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 179                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 180                hvc->index = 0;
 181        }
 182
 183        hvc->ops[hvc->index++] = ((struct host_vm_op)
 184                                  { .type       = MUNMAP,
 185                                    .u = { .munmap = { .addr    = addr,
 186                                                       .len     = len } } });
 187        return ret;
 188}
 189
 190static int add_mprotect(unsigned long addr, unsigned long len,
 191                        unsigned int prot, struct host_vm_change *hvc)
 192{
 193        struct host_vm_op *last;
 194        int ret = 0;
 195
 196        if (hvc->index != 0) {
 197                last = &hvc->ops[hvc->index - 1];
 198                if ((last->type == MPROTECT) &&
 199                   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
 200                   (last->u.mprotect.prot == prot)) {
 201                        last->u.mprotect.len += len;
 202                        return 0;
 203                }
 204        }
 205
 206        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 207                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 208                hvc->index = 0;
 209        }
 210
 211        hvc->ops[hvc->index++] = ((struct host_vm_op)
 212                                  { .type       = MPROTECT,
 213                                    .u = { .mprotect = { .addr  = addr,
 214                                                         .len   = len,
 215                                                         .prot  = prot } } });
 216        return ret;
 217}
 218
 219#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
 220
 221static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 222                                   unsigned long end,
 223                                   struct host_vm_change *hvc)
 224{
 225        pte_t *pte;
 226        int r, w, x, prot, ret = 0;
 227
 228        pte = pte_offset_kernel(pmd, addr);
 229        do {
 230                if ((addr >= STUB_START) && (addr < STUB_END))
 231                        continue;
 232
 233                r = pte_read(*pte);
 234                w = pte_write(*pte);
 235                x = pte_exec(*pte);
 236                if (!pte_young(*pte)) {
 237                        r = 0;
 238                        w = 0;
 239                } else if (!pte_dirty(*pte))
 240                        w = 0;
 241
 242                prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 243                        (x ? UM_PROT_EXEC : 0));
 244                if (hvc->force || pte_newpage(*pte)) {
 245                        if (pte_present(*pte)) {
 246                                if (pte_newpage(*pte))
 247                                        ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
 248                                                       PAGE_SIZE, prot, hvc);
 249                        } else
 250                                ret = add_munmap(addr, PAGE_SIZE, hvc);
 251                } else if (pte_newprot(*pte))
 252                        ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
 253                *pte = pte_mkuptodate(*pte);
 254        } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
 255        return ret;
 256}
 257
 258static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 259                                   unsigned long end,
 260                                   struct host_vm_change *hvc)
 261{
 262        pmd_t *pmd;
 263        unsigned long next;
 264        int ret = 0;
 265
 266        pmd = pmd_offset(pud, addr);
 267        do {
 268                next = pmd_addr_end(addr, end);
 269                if (!pmd_present(*pmd)) {
 270                        if (hvc->force || pmd_newpage(*pmd)) {
 271                                ret = add_munmap(addr, next - addr, hvc);
 272                                pmd_mkuptodate(*pmd);
 273                        }
 274                }
 275                else ret = update_pte_range(pmd, addr, next, hvc);
 276        } while (pmd++, addr = next, ((addr < end) && !ret));
 277        return ret;
 278}
 279
 280static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 281                                   unsigned long end,
 282                                   struct host_vm_change *hvc)
 283{
 284        pud_t *pud;
 285        unsigned long next;
 286        int ret = 0;
 287
 288        pud = pud_offset(pgd, addr);
 289        do {
 290                next = pud_addr_end(addr, end);
 291                if (!pud_present(*pud)) {
 292                        if (hvc->force || pud_newpage(*pud)) {
 293                                ret = add_munmap(addr, next - addr, hvc);
 294                                pud_mkuptodate(*pud);
 295                        }
 296                }
 297                else ret = update_pmd_range(pud, addr, next, hvc);
 298        } while (pud++, addr = next, ((addr < end) && !ret));
 299        return ret;
 300}
 301
 302void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 303                      unsigned long end_addr, int force)
 304{
 305        pgd_t *pgd;
 306        struct host_vm_change hvc;
 307        unsigned long addr = start_addr, next;
 308        int ret = 0, userspace = 1;
 309
 310        hvc = INIT_HVC(mm, force, userspace);
 311        pgd = pgd_offset(mm, addr);
 312        do {
 313                next = pgd_addr_end(addr, end_addr);
 314                if (!pgd_present(*pgd)) {
 315                        if (force || pgd_newpage(*pgd)) {
 316                                ret = add_munmap(addr, next - addr, &hvc);
 317                                pgd_mkuptodate(*pgd);
 318                        }
 319                }
 320                else ret = update_pud_range(pgd, addr, next, &hvc);
 321        } while (pgd++, addr = next, ((addr < end_addr) && !ret));
 322
 323        if (!ret)
 324                ret = do_ops(&hvc, hvc.index, 1);
 325
 326        /* This is not an else because ret is modified above */
 327        if (ret) {
 328                printk(KERN_ERR "fix_range_common: failed, killing current "
 329                       "process: %d\n", task_tgid_vnr(current));
 330                /* We are under mmap_sem, release it such that current can terminate */
 331                up_write(&current->mm->mmap_sem);
 332                force_sig(SIGKILL);
 333                do_signal(&current->thread.regs);
 334        }
 335}
 336
 337static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 338{
 339        struct mm_struct *mm;
 340        pgd_t *pgd;
 341        pud_t *pud;
 342        pmd_t *pmd;
 343        pte_t *pte;
 344        unsigned long addr, last;
 345        int updated = 0, err = 0, force = 0, userspace = 0;
 346        struct host_vm_change hvc;
 347
 348        mm = &init_mm;
 349        hvc = INIT_HVC(mm, force, userspace);
 350        for (addr = start; addr < end;) {
 351                pgd = pgd_offset(mm, addr);
 352                if (!pgd_present(*pgd)) {
 353                        last = ADD_ROUND(addr, PGDIR_SIZE);
 354                        if (last > end)
 355                                last = end;
 356                        if (pgd_newpage(*pgd)) {
 357                                updated = 1;
 358                                err = add_munmap(addr, last - addr, &hvc);
 359                                if (err < 0)
 360                                        panic("munmap failed, errno = %d\n",
 361                                              -err);
 362                        }
 363                        addr = last;
 364                        continue;
 365                }
 366
 367                pud = pud_offset(pgd, addr);
 368                if (!pud_present(*pud)) {
 369                        last = ADD_ROUND(addr, PUD_SIZE);
 370                        if (last > end)
 371                                last = end;
 372                        if (pud_newpage(*pud)) {
 373                                updated = 1;
 374                                err = add_munmap(addr, last - addr, &hvc);
 375                                if (err < 0)
 376                                        panic("munmap failed, errno = %d\n",
 377                                              -err);
 378                        }
 379                        addr = last;
 380                        continue;
 381                }
 382
 383                pmd = pmd_offset(pud, addr);
 384                if (!pmd_present(*pmd)) {
 385                        last = ADD_ROUND(addr, PMD_SIZE);
 386                        if (last > end)
 387                                last = end;
 388                        if (pmd_newpage(*pmd)) {
 389                                updated = 1;
 390                                err = add_munmap(addr, last - addr, &hvc);
 391                                if (err < 0)
 392                                        panic("munmap failed, errno = %d\n",
 393                                              -err);
 394                        }
 395                        addr = last;
 396                        continue;
 397                }
 398
 399                pte = pte_offset_kernel(pmd, addr);
 400                if (!pte_present(*pte) || pte_newpage(*pte)) {
 401                        updated = 1;
 402                        err = add_munmap(addr, PAGE_SIZE, &hvc);
 403                        if (err < 0)
 404                                panic("munmap failed, errno = %d\n",
 405                                      -err);
 406                        if (pte_present(*pte))
 407                                err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
 408                                               PAGE_SIZE, 0, &hvc);
 409                }
 410                else if (pte_newprot(*pte)) {
 411                        updated = 1;
 412                        err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
 413                }
 414                addr += PAGE_SIZE;
 415        }
 416        if (!err)
 417                err = do_ops(&hvc, hvc.index, 1);
 418
 419        if (err < 0)
 420                panic("flush_tlb_kernel failed, errno = %d\n", err);
 421        return updated;
 422}
 423
 424void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 425{
 426        pgd_t *pgd;
 427        pud_t *pud;
 428        pmd_t *pmd;
 429        pte_t *pte;
 430        struct mm_struct *mm = vma->vm_mm;
 431        void *flush = NULL;
 432        int r, w, x, prot, err = 0;
 433        struct mm_id *mm_id;
 434
 435        address &= PAGE_MASK;
 436        pgd = pgd_offset(mm, address);
 437        if (!pgd_present(*pgd))
 438                goto kill;
 439
 440        pud = pud_offset(pgd, address);
 441        if (!pud_present(*pud))
 442                goto kill;
 443
 444        pmd = pmd_offset(pud, address);
 445        if (!pmd_present(*pmd))
 446                goto kill;
 447
 448        pte = pte_offset_kernel(pmd, address);
 449
 450        r = pte_read(*pte);
 451        w = pte_write(*pte);
 452        x = pte_exec(*pte);
 453        if (!pte_young(*pte)) {
 454                r = 0;
 455                w = 0;
 456        } else if (!pte_dirty(*pte)) {
 457                w = 0;
 458        }
 459
 460        mm_id = &mm->context.id;
 461        prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 462                (x ? UM_PROT_EXEC : 0));
 463        if (pte_newpage(*pte)) {
 464                if (pte_present(*pte)) {
 465                        unsigned long long offset;
 466                        int fd;
 467
 468                        fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
 469                        err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
 470                                  1, &flush);
 471                }
 472                else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
 473        }
 474        else if (pte_newprot(*pte))
 475                err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
 476
 477        if (err) {
 478                if (err == -ENOMEM)
 479                        report_enomem();
 480
 481                goto kill;
 482        }
 483
 484        *pte = pte_mkuptodate(*pte);
 485
 486        return;
 487
 488kill:
 489        printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
 490        force_sig(SIGKILL);
 491}
 492
 493pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
 494{
 495        return pgd_offset(mm, address);
 496}
 497
 498pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
 499{
 500        return pud_offset(pgd, address);
 501}
 502
 503pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
 504{
 505        return pmd_offset(pud, address);
 506}
 507
 508pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
 509{
 510        return pte_offset_kernel(pmd, address);
 511}
 512
 513pte_t *addr_pte(struct task_struct *task, unsigned long addr)
 514{
 515        pgd_t *pgd = pgd_offset(task->mm, addr);
 516        pud_t *pud = pud_offset(pgd, addr);
 517        pmd_t *pmd = pmd_offset(pud, addr);
 518
 519        return pte_offset_map(pmd, addr);
 520}
 521
 522void flush_tlb_all(void)
 523{
 524        /*
 525         * Don't bother flushing if this address space is about to be
 526         * destroyed.
 527         */
 528        if (atomic_read(&current->mm->mm_users) == 0)
 529                return;
 530
 531        flush_tlb_mm(current->mm);
 532}
 533
 534void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 535{
 536        flush_tlb_kernel_range_common(start, end);
 537}
 538
 539void flush_tlb_kernel_vm(void)
 540{
 541        flush_tlb_kernel_range_common(start_vm, end_vm);
 542}
 543
 544void __flush_tlb_one(unsigned long addr)
 545{
 546        flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
 547}
 548
 549static void fix_range(struct mm_struct *mm, unsigned long start_addr,
 550                      unsigned long end_addr, int force)
 551{
 552        /*
 553         * Don't bother flushing if this address space is about to be
 554         * destroyed.
 555         */
 556        if (atomic_read(&mm->mm_users) == 0)
 557                return;
 558
 559        fix_range_common(mm, start_addr, end_addr, force);
 560}
 561
 562void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 563                     unsigned long end)
 564{
 565        if (vma->vm_mm == NULL)
 566                flush_tlb_kernel_range_common(start, end);
 567        else fix_range(vma->vm_mm, start, end, 0);
 568}
 569EXPORT_SYMBOL(flush_tlb_range);
 570
 571void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 572                        unsigned long end)
 573{
 574        fix_range(mm, start, end, 0);
 575}
 576
 577void flush_tlb_mm(struct mm_struct *mm)
 578{
 579        struct vm_area_struct *vma = mm->mmap;
 580
 581        while (vma != NULL) {
 582                fix_range(mm, vma->vm_start, vma->vm_end, 0);
 583                vma = vma->vm_next;
 584        }
 585}
 586
 587void force_flush_all(void)
 588{
 589        struct mm_struct *mm = current->mm;
 590        struct vm_area_struct *vma = mm->mmap;
 591
 592        while (vma != NULL) {
 593                fix_range(mm, vma->vm_start, vma->vm_end, 1);
 594                vma = vma->vm_next;
 595        }
 596}
 597