linux/arch/um/kernel/tlb.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include <linux/mm.h>
   7#include <linux/module.h>
   8#include <linux/sched.h>
   9#include <asm/pgtable.h>
  10#include <asm/tlbflush.h>
  11#include <as-layout.h>
  12#include <mem_user.h>
  13#include <os.h>
  14#include <skas.h>
  15#include <kern_util.h>
  16
  17struct host_vm_change {
  18        struct host_vm_op {
  19                enum { NONE, MMAP, MUNMAP, MPROTECT } type;
  20                union {
  21                        struct {
  22                                unsigned long addr;
  23                                unsigned long len;
  24                                unsigned int prot;
  25                                int fd;
  26                                __u64 offset;
  27                        } mmap;
  28                        struct {
  29                                unsigned long addr;
  30                                unsigned long len;
  31                        } munmap;
  32                        struct {
  33                                unsigned long addr;
  34                                unsigned long len;
  35                                unsigned int prot;
  36                        } mprotect;
  37                } u;
  38        } ops[1];
  39        int index;
  40        struct mm_id *id;
  41        void *data;
  42        int force;
  43};
  44
  45#define INIT_HVC(mm, force) \
  46        ((struct host_vm_change) \
  47         { .ops         = { { .type = NONE } }, \
  48           .id          = &mm->context.id, \
  49           .data        = NULL, \
  50           .index       = 0, \
  51           .force       = force })
  52
  53static void report_enomem(void)
  54{
  55        printk(KERN_ERR "UML ran out of memory on the host side! "
  56                        "This can happen due to a memory limitation or "
  57                        "vm.max_map_count has been reached.\n");
  58}
  59
  60static int do_ops(struct host_vm_change *hvc, int end,
  61                  int finished)
  62{
  63        struct host_vm_op *op;
  64        int i, ret = 0;
  65
  66        for (i = 0; i < end && !ret; i++) {
  67                op = &hvc->ops[i];
  68                switch (op->type) {
  69                case MMAP:
  70                        ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
  71                                  op->u.mmap.prot, op->u.mmap.fd,
  72                                  op->u.mmap.offset, finished, &hvc->data);
  73                        break;
  74                case MUNMAP:
  75                        ret = unmap(hvc->id, op->u.munmap.addr,
  76                                    op->u.munmap.len, finished, &hvc->data);
  77                        break;
  78                case MPROTECT:
  79                        ret = protect(hvc->id, op->u.mprotect.addr,
  80                                      op->u.mprotect.len, op->u.mprotect.prot,
  81                                      finished, &hvc->data);
  82                        break;
  83                default:
  84                        printk(KERN_ERR "Unknown op type %d in do_ops\n",
  85                               op->type);
  86                        BUG();
  87                        break;
  88                }
  89        }
  90
  91        if (ret == -ENOMEM)
  92                report_enomem();
  93
  94        return ret;
  95}
  96
  97static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
  98                    unsigned int prot, struct host_vm_change *hvc)
  99{
 100        __u64 offset;
 101        struct host_vm_op *last;
 102        int fd, ret = 0;
 103
 104        fd = phys_mapping(phys, &offset);
 105        if (hvc->index != 0) {
 106                last = &hvc->ops[hvc->index - 1];
 107                if ((last->type == MMAP) &&
 108                   (last->u.mmap.addr + last->u.mmap.len == virt) &&
 109                   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
 110                   (last->u.mmap.offset + last->u.mmap.len == offset)) {
 111                        last->u.mmap.len += len;
 112                        return 0;
 113                }
 114        }
 115
 116        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 117                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 118                hvc->index = 0;
 119        }
 120
 121        hvc->ops[hvc->index++] = ((struct host_vm_op)
 122                                  { .type       = MMAP,
 123                                    .u = { .mmap = { .addr      = virt,
 124                                                     .len       = len,
 125                                                     .prot      = prot,
 126                                                     .fd        = fd,
 127                                                     .offset    = offset }
 128                           } });
 129        return ret;
 130}
 131
 132static int add_munmap(unsigned long addr, unsigned long len,
 133                      struct host_vm_change *hvc)
 134{
 135        struct host_vm_op *last;
 136        int ret = 0;
 137
 138        if ((addr >= STUB_START) && (addr < STUB_END))
 139                return -EINVAL;
 140
 141        if (hvc->index != 0) {
 142                last = &hvc->ops[hvc->index - 1];
 143                if ((last->type == MUNMAP) &&
 144                   (last->u.munmap.addr + last->u.mmap.len == addr)) {
 145                        last->u.munmap.len += len;
 146                        return 0;
 147                }
 148        }
 149
 150        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 151                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 152                hvc->index = 0;
 153        }
 154
 155        hvc->ops[hvc->index++] = ((struct host_vm_op)
 156                                  { .type       = MUNMAP,
 157                                    .u = { .munmap = { .addr    = addr,
 158                                                       .len     = len } } });
 159        return ret;
 160}
 161
 162static int add_mprotect(unsigned long addr, unsigned long len,
 163                        unsigned int prot, struct host_vm_change *hvc)
 164{
 165        struct host_vm_op *last;
 166        int ret = 0;
 167
 168        if (hvc->index != 0) {
 169                last = &hvc->ops[hvc->index - 1];
 170                if ((last->type == MPROTECT) &&
 171                   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
 172                   (last->u.mprotect.prot == prot)) {
 173                        last->u.mprotect.len += len;
 174                        return 0;
 175                }
 176        }
 177
 178        if (hvc->index == ARRAY_SIZE(hvc->ops)) {
 179                ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
 180                hvc->index = 0;
 181        }
 182
 183        hvc->ops[hvc->index++] = ((struct host_vm_op)
 184                                  { .type       = MPROTECT,
 185                                    .u = { .mprotect = { .addr  = addr,
 186                                                         .len   = len,
 187                                                         .prot  = prot } } });
 188        return ret;
 189}
 190
 191#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
 192
 193static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 194                                   unsigned long end,
 195                                   struct host_vm_change *hvc)
 196{
 197        pte_t *pte;
 198        int r, w, x, prot, ret = 0;
 199
 200        pte = pte_offset_kernel(pmd, addr);
 201        do {
 202                if ((addr >= STUB_START) && (addr < STUB_END))
 203                        continue;
 204
 205                r = pte_read(*pte);
 206                w = pte_write(*pte);
 207                x = pte_exec(*pte);
 208                if (!pte_young(*pte)) {
 209                        r = 0;
 210                        w = 0;
 211                } else if (!pte_dirty(*pte))
 212                        w = 0;
 213
 214                prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 215                        (x ? UM_PROT_EXEC : 0));
 216                if (hvc->force || pte_newpage(*pte)) {
 217                        if (pte_present(*pte))
 218                                ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
 219                                               PAGE_SIZE, prot, hvc);
 220                        else
 221                                ret = add_munmap(addr, PAGE_SIZE, hvc);
 222                } else if (pte_newprot(*pte))
 223                        ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
 224                *pte = pte_mkuptodate(*pte);
 225        } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
 226        return ret;
 227}
 228
 229static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 230                                   unsigned long end,
 231                                   struct host_vm_change *hvc)
 232{
 233        pmd_t *pmd;
 234        unsigned long next;
 235        int ret = 0;
 236
 237        pmd = pmd_offset(pud, addr);
 238        do {
 239                next = pmd_addr_end(addr, end);
 240                if (!pmd_present(*pmd)) {
 241                        if (hvc->force || pmd_newpage(*pmd)) {
 242                                ret = add_munmap(addr, next - addr, hvc);
 243                                pmd_mkuptodate(*pmd);
 244                        }
 245                }
 246                else ret = update_pte_range(pmd, addr, next, hvc);
 247        } while (pmd++, addr = next, ((addr < end) && !ret));
 248        return ret;
 249}
 250
 251static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 252                                   unsigned long end,
 253                                   struct host_vm_change *hvc)
 254{
 255        pud_t *pud;
 256        unsigned long next;
 257        int ret = 0;
 258
 259        pud = pud_offset(pgd, addr);
 260        do {
 261                next = pud_addr_end(addr, end);
 262                if (!pud_present(*pud)) {
 263                        if (hvc->force || pud_newpage(*pud)) {
 264                                ret = add_munmap(addr, next - addr, hvc);
 265                                pud_mkuptodate(*pud);
 266                        }
 267                }
 268                else ret = update_pmd_range(pud, addr, next, hvc);
 269        } while (pud++, addr = next, ((addr < end) && !ret));
 270        return ret;
 271}
 272
 273void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 274                      unsigned long end_addr, int force)
 275{
 276        pgd_t *pgd;
 277        struct host_vm_change hvc;
 278        unsigned long addr = start_addr, next;
 279        int ret = 0;
 280
 281        hvc = INIT_HVC(mm, force);
 282        pgd = pgd_offset(mm, addr);
 283        do {
 284                next = pgd_addr_end(addr, end_addr);
 285                if (!pgd_present(*pgd)) {
 286                        if (force || pgd_newpage(*pgd)) {
 287                                ret = add_munmap(addr, next - addr, &hvc);
 288                                pgd_mkuptodate(*pgd);
 289                        }
 290                }
 291                else ret = update_pud_range(pgd, addr, next, &hvc);
 292        } while (pgd++, addr = next, ((addr < end_addr) && !ret));
 293
 294        if (!ret)
 295                ret = do_ops(&hvc, hvc.index, 1);
 296
 297        /* This is not an else because ret is modified above */
 298        if (ret) {
 299                printk(KERN_ERR "fix_range_common: failed, killing current "
 300                       "process: %d\n", task_tgid_vnr(current));
 301                /* We are under mmap_sem, release it such that current can terminate */
 302                up_write(&current->mm->mmap_sem);
 303                force_sig(SIGKILL, current);
 304                do_signal(&current->thread.regs);
 305        }
 306}
 307
 308static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 309{
 310        struct mm_struct *mm;
 311        pgd_t *pgd;
 312        pud_t *pud;
 313        pmd_t *pmd;
 314        pte_t *pte;
 315        unsigned long addr, last;
 316        int updated = 0, err;
 317
 318        mm = &init_mm;
 319        for (addr = start; addr < end;) {
 320                pgd = pgd_offset(mm, addr);
 321                if (!pgd_present(*pgd)) {
 322                        last = ADD_ROUND(addr, PGDIR_SIZE);
 323                        if (last > end)
 324                                last = end;
 325                        if (pgd_newpage(*pgd)) {
 326                                updated = 1;
 327                                err = os_unmap_memory((void *) addr,
 328                                                      last - addr);
 329                                if (err < 0)
 330                                        panic("munmap failed, errno = %d\n",
 331                                              -err);
 332                        }
 333                        addr = last;
 334                        continue;
 335                }
 336
 337                pud = pud_offset(pgd, addr);
 338                if (!pud_present(*pud)) {
 339                        last = ADD_ROUND(addr, PUD_SIZE);
 340                        if (last > end)
 341                                last = end;
 342                        if (pud_newpage(*pud)) {
 343                                updated = 1;
 344                                err = os_unmap_memory((void *) addr,
 345                                                      last - addr);
 346                                if (err < 0)
 347                                        panic("munmap failed, errno = %d\n",
 348                                              -err);
 349                        }
 350                        addr = last;
 351                        continue;
 352                }
 353
 354                pmd = pmd_offset(pud, addr);
 355                if (!pmd_present(*pmd)) {
 356                        last = ADD_ROUND(addr, PMD_SIZE);
 357                        if (last > end)
 358                                last = end;
 359                        if (pmd_newpage(*pmd)) {
 360                                updated = 1;
 361                                err = os_unmap_memory((void *) addr,
 362                                                      last - addr);
 363                                if (err < 0)
 364                                        panic("munmap failed, errno = %d\n",
 365                                              -err);
 366                        }
 367                        addr = last;
 368                        continue;
 369                }
 370
 371                pte = pte_offset_kernel(pmd, addr);
 372                if (!pte_present(*pte) || pte_newpage(*pte)) {
 373                        updated = 1;
 374                        err = os_unmap_memory((void *) addr,
 375                                              PAGE_SIZE);
 376                        if (err < 0)
 377                                panic("munmap failed, errno = %d\n",
 378                                      -err);
 379                        if (pte_present(*pte))
 380                                map_memory(addr,
 381                                           pte_val(*pte) & PAGE_MASK,
 382                                           PAGE_SIZE, 1, 1, 1);
 383                }
 384                else if (pte_newprot(*pte)) {
 385                        updated = 1;
 386                        os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
 387                }
 388                addr += PAGE_SIZE;
 389        }
 390        return updated;
 391}
 392
 393void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
 394{
 395        pgd_t *pgd;
 396        pud_t *pud;
 397        pmd_t *pmd;
 398        pte_t *pte;
 399        struct mm_struct *mm = vma->vm_mm;
 400        void *flush = NULL;
 401        int r, w, x, prot, err = 0;
 402        struct mm_id *mm_id;
 403
 404        address &= PAGE_MASK;
 405        pgd = pgd_offset(mm, address);
 406        if (!pgd_present(*pgd))
 407                goto kill;
 408
 409        pud = pud_offset(pgd, address);
 410        if (!pud_present(*pud))
 411                goto kill;
 412
 413        pmd = pmd_offset(pud, address);
 414        if (!pmd_present(*pmd))
 415                goto kill;
 416
 417        pte = pte_offset_kernel(pmd, address);
 418
 419        r = pte_read(*pte);
 420        w = pte_write(*pte);
 421        x = pte_exec(*pte);
 422        if (!pte_young(*pte)) {
 423                r = 0;
 424                w = 0;
 425        } else if (!pte_dirty(*pte)) {
 426                w = 0;
 427        }
 428
 429        mm_id = &mm->context.id;
 430        prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 431                (x ? UM_PROT_EXEC : 0));
 432        if (pte_newpage(*pte)) {
 433                if (pte_present(*pte)) {
 434                        unsigned long long offset;
 435                        int fd;
 436
 437                        fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
 438                        err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
 439                                  1, &flush);
 440                }
 441                else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
 442        }
 443        else if (pte_newprot(*pte))
 444                err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
 445
 446        if (err) {
 447                if (err == -ENOMEM)
 448                        report_enomem();
 449
 450                goto kill;
 451        }
 452
 453        *pte = pte_mkuptodate(*pte);
 454
 455        return;
 456
 457kill:
 458        printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
 459        force_sig(SIGKILL, current);
 460}
 461
 462pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
 463{
 464        return pgd_offset(mm, address);
 465}
 466
 467pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
 468{
 469        return pud_offset(pgd, address);
 470}
 471
 472pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
 473{
 474        return pmd_offset(pud, address);
 475}
 476
 477pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
 478{
 479        return pte_offset_kernel(pmd, address);
 480}
 481
 482pte_t *addr_pte(struct task_struct *task, unsigned long addr)
 483{
 484        pgd_t *pgd = pgd_offset(task->mm, addr);
 485        pud_t *pud = pud_offset(pgd, addr);
 486        pmd_t *pmd = pmd_offset(pud, addr);
 487
 488        return pte_offset_map(pmd, addr);
 489}
 490
 491void flush_tlb_all(void)
 492{
 493        flush_tlb_mm(current->mm);
 494}
 495
 496void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 497{
 498        flush_tlb_kernel_range_common(start, end);
 499}
 500
 501void flush_tlb_kernel_vm(void)
 502{
 503        flush_tlb_kernel_range_common(start_vm, end_vm);
 504}
 505
 506void __flush_tlb_one(unsigned long addr)
 507{
 508        flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
 509}
 510
 511static void fix_range(struct mm_struct *mm, unsigned long start_addr,
 512                      unsigned long end_addr, int force)
 513{
 514        fix_range_common(mm, start_addr, end_addr, force);
 515}
 516
 517void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 518                     unsigned long end)
 519{
 520        if (vma->vm_mm == NULL)
 521                flush_tlb_kernel_range_common(start, end);
 522        else fix_range(vma->vm_mm, start, end, 0);
 523}
 524EXPORT_SYMBOL(flush_tlb_range);
 525
 526void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 527                        unsigned long end)
 528{
 529        /*
 530         * Don't bother flushing if this address space is about to be
 531         * destroyed.
 532         */
 533        if (atomic_read(&mm->mm_users) == 0)
 534                return;
 535
 536        fix_range(mm, start, end, 0);
 537}
 538
 539void flush_tlb_mm(struct mm_struct *mm)
 540{
 541        struct vm_area_struct *vma = mm->mmap;
 542
 543        while (vma != NULL) {
 544                fix_range(mm, vma->vm_start, vma->vm_end, 0);
 545                vma = vma->vm_next;
 546        }
 547}
 548
 549void force_flush_all(void)
 550{
 551        struct mm_struct *mm = current->mm;
 552        struct vm_area_struct *vma = mm->mmap;
 553
 554        while (vma != NULL) {
 555                fix_range(mm, vma->vm_start, vma->vm_end, 1);
 556                vma = vma->vm_next;
 557        }
 558}
 559