linux/arch/x86/mm/kmemcheck/kmemcheck.c
<<
>>
Prefs
   1/**
   2 * kmemcheck - a heavyweight memory checker for the linux kernel
   3 * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
   4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License (version 2) as
   8 * published by the Free Software Foundation.
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/interrupt.h>
  13#include <linux/kallsyms.h>
  14#include <linux/kernel.h>
  15#include <linux/kmemcheck.h>
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/page-flags.h>
  19#include <linux/percpu.h>
  20#include <linux/ptrace.h>
  21#include <linux/string.h>
  22#include <linux/types.h>
  23
  24#include <asm/cacheflush.h>
  25#include <asm/kmemcheck.h>
  26#include <asm/pgtable.h>
  27#include <asm/tlbflush.h>
  28
  29#include "error.h"
  30#include "opcode.h"
  31#include "pte.h"
  32#include "selftest.h"
  33#include "shadow.h"
  34
  35
  36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
  37#  define KMEMCHECK_ENABLED 0
  38#endif
  39
  40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
  41#  define KMEMCHECK_ENABLED 1
  42#endif
  43
  44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
  45#  define KMEMCHECK_ENABLED 2
  46#endif
  47
  48int kmemcheck_enabled = KMEMCHECK_ENABLED;
  49
  50int __init kmemcheck_init(void)
  51{
  52#ifdef CONFIG_SMP
  53        /*
  54         * Limit SMP to use a single CPU. We rely on the fact that this code
  55         * runs before SMP is set up.
  56         */
  57        if (setup_max_cpus > 1) {
  58                printk(KERN_INFO
  59                        "kmemcheck: Limiting number of CPUs to 1.\n");
  60                setup_max_cpus = 1;
  61        }
  62#endif
  63
  64        if (!kmemcheck_selftest()) {
  65                printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
  66                kmemcheck_enabled = 0;
  67                return -EINVAL;
  68        }
  69
  70        printk(KERN_INFO "kmemcheck: Initialized\n");
  71        return 0;
  72}
  73
  74early_initcall(kmemcheck_init);
  75
  76/*
  77 * We need to parse the kmemcheck= option before any memory is allocated.
  78 */
  79static int __init param_kmemcheck(char *str)
  80{
  81        if (!str)
  82                return -EINVAL;
  83
  84        sscanf(str, "%d", &kmemcheck_enabled);
  85        return 0;
  86}
  87
  88early_param("kmemcheck", param_kmemcheck);
  89
  90int kmemcheck_show_addr(unsigned long address)
  91{
  92        pte_t *pte;
  93
  94        pte = kmemcheck_pte_lookup(address);
  95        if (!pte)
  96                return 0;
  97
  98        set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
  99        __flush_tlb_one(address);
 100        return 1;
 101}
 102
 103int kmemcheck_hide_addr(unsigned long address)
 104{
 105        pte_t *pte;
 106
 107        pte = kmemcheck_pte_lookup(address);
 108        if (!pte)
 109                return 0;
 110
 111        set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
 112        __flush_tlb_one(address);
 113        return 1;
 114}
 115
 116struct kmemcheck_context {
 117        bool busy;
 118        int balance;
 119
 120        /*
 121         * There can be at most two memory operands to an instruction, but
 122         * each address can cross a page boundary -- so we may need up to
 123         * four addresses that must be hidden/revealed for each fault.
 124         */
 125        unsigned long addr[4];
 126        unsigned long n_addrs;
 127        unsigned long flags;
 128
 129        /* Data size of the instruction that caused a fault. */
 130        unsigned int size;
 131};
 132
 133static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
 134
 135bool kmemcheck_active(struct pt_regs *regs)
 136{
 137        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 138
 139        return data->balance > 0;
 140}
 141
 142/* Save an address that needs to be shown/hidden */
 143static void kmemcheck_save_addr(unsigned long addr)
 144{
 145        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 146
 147        BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
 148        data->addr[data->n_addrs++] = addr;
 149}
 150
 151static unsigned int kmemcheck_show_all(void)
 152{
 153        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 154        unsigned int i;
 155        unsigned int n;
 156
 157        n = 0;
 158        for (i = 0; i < data->n_addrs; ++i)
 159                n += kmemcheck_show_addr(data->addr[i]);
 160
 161        return n;
 162}
 163
 164static unsigned int kmemcheck_hide_all(void)
 165{
 166        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 167        unsigned int i;
 168        unsigned int n;
 169
 170        n = 0;
 171        for (i = 0; i < data->n_addrs; ++i)
 172                n += kmemcheck_hide_addr(data->addr[i]);
 173
 174        return n;
 175}
 176
 177/*
 178 * Called from the #PF handler.
 179 */
 180void kmemcheck_show(struct pt_regs *regs)
 181{
 182        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 183
 184        BUG_ON(!irqs_disabled());
 185
 186        if (unlikely(data->balance != 0)) {
 187                kmemcheck_show_all();
 188                kmemcheck_error_save_bug(regs);
 189                data->balance = 0;
 190                return;
 191        }
 192
 193        /*
 194         * None of the addresses actually belonged to kmemcheck. Note that
 195         * this is not an error.
 196         */
 197        if (kmemcheck_show_all() == 0)
 198                return;
 199
 200        ++data->balance;
 201
 202        /*
 203         * The IF needs to be cleared as well, so that the faulting
 204         * instruction can run "uninterrupted". Otherwise, we might take
 205         * an interrupt and start executing that before we've had a chance
 206         * to hide the page again.
 207         *
 208         * NOTE: In the rare case of multiple faults, we must not override
 209         * the original flags:
 210         */
 211        if (!(regs->flags & X86_EFLAGS_TF))
 212                data->flags = regs->flags;
 213
 214        regs->flags |= X86_EFLAGS_TF;
 215        regs->flags &= ~X86_EFLAGS_IF;
 216}
 217
 218/*
 219 * Called from the #DB handler.
 220 */
 221void kmemcheck_hide(struct pt_regs *regs)
 222{
 223        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 224        int n;
 225
 226        BUG_ON(!irqs_disabled());
 227
 228        if (unlikely(data->balance != 1)) {
 229                kmemcheck_show_all();
 230                kmemcheck_error_save_bug(regs);
 231                data->n_addrs = 0;
 232                data->balance = 0;
 233
 234                if (!(data->flags & X86_EFLAGS_TF))
 235                        regs->flags &= ~X86_EFLAGS_TF;
 236                if (data->flags & X86_EFLAGS_IF)
 237                        regs->flags |= X86_EFLAGS_IF;
 238                return;
 239        }
 240
 241        if (kmemcheck_enabled)
 242                n = kmemcheck_hide_all();
 243        else
 244                n = kmemcheck_show_all();
 245
 246        if (n == 0)
 247                return;
 248
 249        --data->balance;
 250
 251        data->n_addrs = 0;
 252
 253        if (!(data->flags & X86_EFLAGS_TF))
 254                regs->flags &= ~X86_EFLAGS_TF;
 255        if (data->flags & X86_EFLAGS_IF)
 256                regs->flags |= X86_EFLAGS_IF;
 257}
 258
 259void kmemcheck_show_pages(struct page *p, unsigned int n)
 260{
 261        unsigned int i;
 262
 263        for (i = 0; i < n; ++i) {
 264                unsigned long address;
 265                pte_t *pte;
 266                unsigned int level;
 267
 268                address = (unsigned long) page_address(&p[i]);
 269                pte = lookup_address(address, &level);
 270                BUG_ON(!pte);
 271                BUG_ON(level != PG_LEVEL_4K);
 272
 273                set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
 274                set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
 275                __flush_tlb_one(address);
 276        }
 277}
 278
 279bool kmemcheck_page_is_tracked(struct page *p)
 280{
 281        /* This will also check the "hidden" flag of the PTE. */
 282        return kmemcheck_pte_lookup((unsigned long) page_address(p));
 283}
 284
 285void kmemcheck_hide_pages(struct page *p, unsigned int n)
 286{
 287        unsigned int i;
 288
 289        for (i = 0; i < n; ++i) {
 290                unsigned long address;
 291                pte_t *pte;
 292                unsigned int level;
 293
 294                address = (unsigned long) page_address(&p[i]);
 295                pte = lookup_address(address, &level);
 296                BUG_ON(!pte);
 297                BUG_ON(level != PG_LEVEL_4K);
 298
 299                set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
 300                set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
 301                __flush_tlb_one(address);
 302        }
 303}
 304
 305/* Access may NOT cross page boundary */
 306static void kmemcheck_read_strict(struct pt_regs *regs,
 307        unsigned long addr, unsigned int size)
 308{
 309        void *shadow;
 310        enum kmemcheck_shadow status;
 311
 312        shadow = kmemcheck_shadow_lookup(addr);
 313        if (!shadow)
 314                return;
 315
 316        kmemcheck_save_addr(addr);
 317        status = kmemcheck_shadow_test(shadow, size);
 318        if (status == KMEMCHECK_SHADOW_INITIALIZED)
 319                return;
 320
 321        if (kmemcheck_enabled)
 322                kmemcheck_error_save(status, addr, size, regs);
 323
 324        if (kmemcheck_enabled == 2)
 325                kmemcheck_enabled = 0;
 326
 327        /* Don't warn about it again. */
 328        kmemcheck_shadow_set(shadow, size);
 329}
 330
 331bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 332{
 333        enum kmemcheck_shadow status;
 334        void *shadow;
 335
 336        shadow = kmemcheck_shadow_lookup(addr);
 337        if (!shadow)
 338                return true;
 339
 340        status = kmemcheck_shadow_test_all(shadow, size);
 341
 342        return status == KMEMCHECK_SHADOW_INITIALIZED;
 343}
 344
 345/* Access may cross page boundary */
 346static void kmemcheck_read(struct pt_regs *regs,
 347        unsigned long addr, unsigned int size)
 348{
 349        unsigned long page = addr & PAGE_MASK;
 350        unsigned long next_addr = addr + size - 1;
 351        unsigned long next_page = next_addr & PAGE_MASK;
 352
 353        if (likely(page == next_page)) {
 354                kmemcheck_read_strict(regs, addr, size);
 355                return;
 356        }
 357
 358        /*
 359         * What we do is basically to split the access across the
 360         * two pages and handle each part separately. Yes, this means
 361         * that we may now see reads that are 3 + 5 bytes, for
 362         * example (and if both are uninitialized, there will be two
 363         * reports), but it makes the code a lot simpler.
 364         */
 365        kmemcheck_read_strict(regs, addr, next_page - addr);
 366        kmemcheck_read_strict(regs, next_page, next_addr - next_page);
 367}
 368
 369static void kmemcheck_write_strict(struct pt_regs *regs,
 370        unsigned long addr, unsigned int size)
 371{
 372        void *shadow;
 373
 374        shadow = kmemcheck_shadow_lookup(addr);
 375        if (!shadow)
 376                return;
 377
 378        kmemcheck_save_addr(addr);
 379        kmemcheck_shadow_set(shadow, size);
 380}
 381
 382static void kmemcheck_write(struct pt_regs *regs,
 383        unsigned long addr, unsigned int size)
 384{
 385        unsigned long page = addr & PAGE_MASK;
 386        unsigned long next_addr = addr + size - 1;
 387        unsigned long next_page = next_addr & PAGE_MASK;
 388
 389        if (likely(page == next_page)) {
 390                kmemcheck_write_strict(regs, addr, size);
 391                return;
 392        }
 393
 394        /* See comment in kmemcheck_read(). */
 395        kmemcheck_write_strict(regs, addr, next_page - addr);
 396        kmemcheck_write_strict(regs, next_page, next_addr - next_page);
 397}
 398
 399/*
 400 * Copying is hard. We have two addresses, each of which may be split across
 401 * a page (and each page will have different shadow addresses).
 402 */
 403static void kmemcheck_copy(struct pt_regs *regs,
 404        unsigned long src_addr, unsigned long dst_addr, unsigned int size)
 405{
 406        uint8_t shadow[8];
 407        enum kmemcheck_shadow status;
 408
 409        unsigned long page;
 410        unsigned long next_addr;
 411        unsigned long next_page;
 412
 413        uint8_t *x;
 414        unsigned int i;
 415        unsigned int n;
 416
 417        BUG_ON(size > sizeof(shadow));
 418
 419        page = src_addr & PAGE_MASK;
 420        next_addr = src_addr + size - 1;
 421        next_page = next_addr & PAGE_MASK;
 422
 423        if (likely(page == next_page)) {
 424                /* Same page */
 425                x = kmemcheck_shadow_lookup(src_addr);
 426                if (x) {
 427                        kmemcheck_save_addr(src_addr);
 428                        for (i = 0; i < size; ++i)
 429                                shadow[i] = x[i];
 430                } else {
 431                        for (i = 0; i < size; ++i)
 432                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 433                }
 434        } else {
 435                n = next_page - src_addr;
 436                BUG_ON(n > sizeof(shadow));
 437
 438                /* First page */
 439                x = kmemcheck_shadow_lookup(src_addr);
 440                if (x) {
 441                        kmemcheck_save_addr(src_addr);
 442                        for (i = 0; i < n; ++i)
 443                                shadow[i] = x[i];
 444                } else {
 445                        /* Not tracked */
 446                        for (i = 0; i < n; ++i)
 447                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 448                }
 449
 450                /* Second page */
 451                x = kmemcheck_shadow_lookup(next_page);
 452                if (x) {
 453                        kmemcheck_save_addr(next_page);
 454                        for (i = n; i < size; ++i)
 455                                shadow[i] = x[i - n];
 456                } else {
 457                        /* Not tracked */
 458                        for (i = n; i < size; ++i)
 459                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 460                }
 461        }
 462
 463        page = dst_addr & PAGE_MASK;
 464        next_addr = dst_addr + size - 1;
 465        next_page = next_addr & PAGE_MASK;
 466
 467        if (likely(page == next_page)) {
 468                /* Same page */
 469                x = kmemcheck_shadow_lookup(dst_addr);
 470                if (x) {
 471                        kmemcheck_save_addr(dst_addr);
 472                        for (i = 0; i < size; ++i) {
 473                                x[i] = shadow[i];
 474                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 475                        }
 476                }
 477        } else {
 478                n = next_page - dst_addr;
 479                BUG_ON(n > sizeof(shadow));
 480
 481                /* First page */
 482                x = kmemcheck_shadow_lookup(dst_addr);
 483                if (x) {
 484                        kmemcheck_save_addr(dst_addr);
 485                        for (i = 0; i < n; ++i) {
 486                                x[i] = shadow[i];
 487                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 488                        }
 489                }
 490
 491                /* Second page */
 492                x = kmemcheck_shadow_lookup(next_page);
 493                if (x) {
 494                        kmemcheck_save_addr(next_page);
 495                        for (i = n; i < size; ++i) {
 496                                x[i - n] = shadow[i];
 497                                shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
 498                        }
 499                }
 500        }
 501
 502        status = kmemcheck_shadow_test(shadow, size);
 503        if (status == KMEMCHECK_SHADOW_INITIALIZED)
 504                return;
 505
 506        if (kmemcheck_enabled)
 507                kmemcheck_error_save(status, src_addr, size, regs);
 508
 509        if (kmemcheck_enabled == 2)
 510                kmemcheck_enabled = 0;
 511}
 512
 513enum kmemcheck_method {
 514        KMEMCHECK_READ,
 515        KMEMCHECK_WRITE,
 516};
 517
 518static void kmemcheck_access(struct pt_regs *regs,
 519        unsigned long fallback_address, enum kmemcheck_method fallback_method)
 520{
 521        const uint8_t *insn;
 522        const uint8_t *insn_primary;
 523        unsigned int size;
 524
 525        struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
 526
 527        /* Recursive fault -- ouch. */
 528        if (data->busy) {
 529                kmemcheck_show_addr(fallback_address);
 530                kmemcheck_error_save_bug(regs);
 531                return;
 532        }
 533
 534        data->busy = true;
 535
 536        insn = (const uint8_t *) regs->ip;
 537        insn_primary = kmemcheck_opcode_get_primary(insn);
 538
 539        kmemcheck_opcode_decode(insn, &size);
 540
 541        switch (insn_primary[0]) {
 542#ifdef CONFIG_KMEMCHECK_BITOPS_OK
 543                /* AND, OR, XOR */
 544                /*
 545                 * Unfortunately, these instructions have to be excluded from
 546                 * our regular checking since they access only some (and not
 547                 * all) bits. This clears out "bogus" bitfield-access warnings.
 548                 */
 549        case 0x80:
 550        case 0x81:
 551        case 0x82:
 552        case 0x83:
 553                switch ((insn_primary[1] >> 3) & 7) {
 554                        /* OR */
 555                case 1:
 556                        /* AND */
 557                case 4:
 558                        /* XOR */
 559                case 6:
 560                        kmemcheck_write(regs, fallback_address, size);
 561                        goto out;
 562
 563                        /* ADD */
 564                case 0:
 565                        /* ADC */
 566                case 2:
 567                        /* SBB */
 568                case 3:
 569                        /* SUB */
 570                case 5:
 571                        /* CMP */
 572                case 7:
 573                        break;
 574                }
 575                break;
 576#endif
 577
 578                /* MOVS, MOVSB, MOVSW, MOVSD */
 579        case 0xa4:
 580        case 0xa5:
 581                /*
 582                 * These instructions are special because they take two
 583                 * addresses, but we only get one page fault.
 584                 */
 585                kmemcheck_copy(regs, regs->si, regs->di, size);
 586                goto out;
 587
 588                /* CMPS, CMPSB, CMPSW, CMPSD */
 589        case 0xa6:
 590        case 0xa7:
 591                kmemcheck_read(regs, regs->si, size);
 592                kmemcheck_read(regs, regs->di, size);
 593                goto out;
 594        }
 595
 596        /*
 597         * If the opcode isn't special in any way, we use the data from the
 598         * page fault handler to determine the address and type of memory
 599         * access.
 600         */
 601        switch (fallback_method) {
 602        case KMEMCHECK_READ:
 603                kmemcheck_read(regs, fallback_address, size);
 604                goto out;
 605        case KMEMCHECK_WRITE:
 606                kmemcheck_write(regs, fallback_address, size);
 607                goto out;
 608        }
 609
 610out:
 611        data->busy = false;
 612}
 613
 614bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
 615        unsigned long error_code)
 616{
 617        pte_t *pte;
 618
 619        /*
 620         * XXX: Is it safe to assume that memory accesses from virtual 86
 621         * mode or non-kernel code segments will _never_ access kernel
 622         * memory (e.g. tracked pages)? For now, we need this to avoid
 623         * invoking kmemcheck for PnP BIOS calls.
 624         */
 625        if (regs->flags & X86_VM_MASK)
 626                return false;
 627        if (regs->cs != __KERNEL_CS)
 628                return false;
 629
 630        pte = kmemcheck_pte_lookup(address);
 631        if (!pte)
 632                return false;
 633
 634        WARN_ON_ONCE(in_nmi());
 635
 636        if (error_code & 2)
 637                kmemcheck_access(regs, address, KMEMCHECK_WRITE);
 638        else
 639                kmemcheck_access(regs, address, KMEMCHECK_READ);
 640
 641        kmemcheck_show(regs);
 642        return true;
 643}
 644
 645bool kmemcheck_trap(struct pt_regs *regs)
 646{
 647        if (!kmemcheck_active(regs))
 648                return false;
 649
 650        /* We're done. */
 651        kmemcheck_hide(regs);
 652        return true;
 653}
 654