linux/fs/proc/kcore.c
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/kcore.h>
  15#include <linux/user.h>
  16#include <linux/capability.h>
  17#include <linux/elf.h>
  18#include <linux/elfcore.h>
  19#include <linux/notifier.h>
  20#include <linux/vmalloc.h>
  21#include <linux/highmem.h>
  22#include <linux/printk.h>
  23#include <linux/bootmem.h>
  24#include <linux/init.h>
  25#include <linux/slab.h>
  26#include <asm/uaccess.h>
  27#include <asm/io.h>
  28#include <linux/list.h>
  29#include <linux/ioport.h>
  30#include <linux/memory.h>
  31#include <asm/sections.h>
  32#include "internal.h"
  33
  34#define CORE_STR "CORE"
  35
  36#ifndef ELF_CORE_EFLAGS
  37#define ELF_CORE_EFLAGS 0
  38#endif
  39
  40static struct proc_dir_entry *proc_root_kcore;
  41
  42
  43#ifndef kc_vaddr_to_offset
  44#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  45#endif
  46#ifndef kc_offset_to_vaddr
  47#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  48#endif
  49
  50/* An ELF note in memory */
  51struct memelfnote
  52{
  53        const char *name;
  54        int type;
  55        unsigned int datasz;
  56        void *data;
  57};
  58
  59static LIST_HEAD(kclist_head);
  60static DEFINE_RWLOCK(kclist_lock);
  61static int kcore_need_update = 1;
  62
  63void
  64kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  65{
  66        new->addr = (unsigned long)addr;
  67        new->size = size;
  68        new->type = type;
  69
  70        write_lock(&kclist_lock);
  71        list_add_tail(&new->list, &kclist_head);
  72        write_unlock(&kclist_lock);
  73}
  74
  75static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  76{
  77        size_t try, size;
  78        struct kcore_list *m;
  79
  80        *nphdr = 1; /* PT_NOTE */
  81        size = 0;
  82
  83        list_for_each_entry(m, &kclist_head, list) {
  84                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  85                if (try > size)
  86                        size = try;
  87                *nphdr = *nphdr + 1;
  88        }
  89        *elf_buflen =   sizeof(struct elfhdr) + 
  90                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  91                        3 * ((sizeof(struct elf_note)) +
  92                             roundup(sizeof(CORE_STR), 4)) +
  93                        roundup(sizeof(struct elf_prstatus), 4) +
  94                        roundup(sizeof(struct elf_prpsinfo), 4) +
  95                        roundup(sizeof(struct task_struct), 4);
  96        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  97        return size + *elf_buflen;
  98}
  99
 100static void free_kclist_ents(struct list_head *head)
 101{
 102        struct kcore_list *tmp, *pos;
 103
 104        list_for_each_entry_safe(pos, tmp, head, list) {
 105                list_del(&pos->list);
 106                kfree(pos);
 107        }
 108}
 109/*
 110 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 111 */
 112static void __kcore_update_ram(struct list_head *list)
 113{
 114        int nphdr;
 115        size_t size;
 116        struct kcore_list *tmp, *pos;
 117        LIST_HEAD(garbage);
 118
 119        write_lock(&kclist_lock);
 120        if (kcore_need_update) {
 121                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 122                        if (pos->type == KCORE_RAM
 123                                || pos->type == KCORE_VMEMMAP)
 124                                list_move(&pos->list, &garbage);
 125                }
 126                list_splice_tail(list, &kclist_head);
 127        } else
 128                list_splice(list, &garbage);
 129        kcore_need_update = 0;
 130        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 131        write_unlock(&kclist_lock);
 132
 133        free_kclist_ents(&garbage);
 134}
 135
 136
 137#ifdef CONFIG_HIGHMEM
 138/*
 139 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 140 * because memory hole is not as big as !HIGHMEM case.
 141 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 142 */
 143static int kcore_update_ram(void)
 144{
 145        LIST_HEAD(head);
 146        struct kcore_list *ent;
 147        int ret = 0;
 148
 149        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 150        if (!ent)
 151                return -ENOMEM;
 152        ent->addr = (unsigned long)__va(0);
 153        ent->size = max_low_pfn << PAGE_SHIFT;
 154        ent->type = KCORE_RAM;
 155        list_add(&ent->list, &head);
 156        __kcore_update_ram(&head);
 157        return ret;
 158}
 159
 160#else /* !CONFIG_HIGHMEM */
 161
 162#ifdef CONFIG_SPARSEMEM_VMEMMAP
 163/* calculate vmemmap's address from given system ram pfn and register it */
 164static int
 165get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 166{
 167        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 168        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 169        unsigned long start, end;
 170        struct kcore_list *vmm, *tmp;
 171
 172
 173        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 174        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 175        end = ALIGN(end, PAGE_SIZE);
 176        /* overlap check (because we have to align page */
 177        list_for_each_entry(tmp, head, list) {
 178                if (tmp->type != KCORE_VMEMMAP)
 179                        continue;
 180                if (start < tmp->addr + tmp->size)
 181                        if (end > tmp->addr)
 182                                end = tmp->addr;
 183        }
 184        if (start < end) {
 185                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 186                if (!vmm)
 187                        return 0;
 188                vmm->addr = start;
 189                vmm->size = end - start;
 190                vmm->type = KCORE_VMEMMAP;
 191                list_add_tail(&vmm->list, head);
 192        }
 193        return 1;
 194
 195}
 196#else
 197static int
 198get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 199{
 200        return 1;
 201}
 202
 203#endif
 204
 205static int
 206kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 207{
 208        struct list_head *head = (struct list_head *)arg;
 209        struct kcore_list *ent;
 210
 211        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 212        if (!ent)
 213                return -ENOMEM;
 214        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 215        ent->size = nr_pages << PAGE_SHIFT;
 216
 217        /* Sanity check: Can happen in 32bit arch...maybe */
 218        if (ent->addr < (unsigned long) __va(0))
 219                goto free_out;
 220
 221        /* cut not-mapped area. ....from ppc-32 code. */
 222        if (ULONG_MAX - ent->addr < ent->size)
 223                ent->size = ULONG_MAX - ent->addr;
 224
 225        /* cut when vmalloc() area is higher than direct-map area */
 226        if (VMALLOC_START > (unsigned long)__va(0)) {
 227                if (ent->addr > VMALLOC_START)
 228                        goto free_out;
 229                if (VMALLOC_START - ent->addr < ent->size)
 230                        ent->size = VMALLOC_START - ent->addr;
 231        }
 232
 233        ent->type = KCORE_RAM;
 234        list_add_tail(&ent->list, head);
 235
 236        if (!get_sparsemem_vmemmap_info(ent, head)) {
 237                list_del(&ent->list);
 238                goto free_out;
 239        }
 240
 241        return 0;
 242free_out:
 243        kfree(ent);
 244        return 1;
 245}
 246
 247static int kcore_update_ram(void)
 248{
 249        int nid, ret;
 250        unsigned long end_pfn;
 251        LIST_HEAD(head);
 252
 253        /* Not inialized....update now */
 254        /* find out "max pfn" */
 255        end_pfn = 0;
 256        for_each_node_state(nid, N_MEMORY) {
 257                unsigned long node_end;
 258                node_end  = NODE_DATA(nid)->node_start_pfn +
 259                        NODE_DATA(nid)->node_spanned_pages;
 260                if (end_pfn < node_end)
 261                        end_pfn = node_end;
 262        }
 263        /* scan 0 to max_pfn */
 264        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 265        if (ret) {
 266                free_kclist_ents(&head);
 267                return -ENOMEM;
 268        }
 269        __kcore_update_ram(&head);
 270        return ret;
 271}
 272#endif /* CONFIG_HIGHMEM */
 273
 274/*****************************************************************************/
 275/*
 276 * determine size of ELF note
 277 */
 278static int notesize(struct memelfnote *en)
 279{
 280        int sz;
 281
 282        sz = sizeof(struct elf_note);
 283        sz += roundup((strlen(en->name) + 1), 4);
 284        sz += roundup(en->datasz, 4);
 285
 286        return sz;
 287} /* end notesize() */
 288
 289/*****************************************************************************/
 290/*
 291 * store a note in the header buffer
 292 */
 293static char *storenote(struct memelfnote *men, char *bufp)
 294{
 295        struct elf_note en;
 296
 297#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 298
 299        en.n_namesz = strlen(men->name) + 1;
 300        en.n_descsz = men->datasz;
 301        en.n_type = men->type;
 302
 303        DUMP_WRITE(&en, sizeof(en));
 304        DUMP_WRITE(men->name, en.n_namesz);
 305
 306        /* XXX - cast from long long to long to avoid need for libgcc.a */
 307        bufp = (char*) roundup((unsigned long)bufp,4);
 308        DUMP_WRITE(men->data, men->datasz);
 309        bufp = (char*) roundup((unsigned long)bufp,4);
 310
 311#undef DUMP_WRITE
 312
 313        return bufp;
 314} /* end storenote() */
 315
 316/*
 317 * store an ELF coredump header in the supplied buffer
 318 * nphdr is the number of elf_phdr to insert
 319 */
 320static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 321{
 322        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 323        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 324        struct elf_phdr *nhdr, *phdr;
 325        struct elfhdr *elf;
 326        struct memelfnote notes[3];
 327        off_t offset = 0;
 328        struct kcore_list *m;
 329
 330        /* setup ELF header */
 331        elf = (struct elfhdr *) bufp;
 332        bufp += sizeof(struct elfhdr);
 333        offset += sizeof(struct elfhdr);
 334        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 335        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 336        elf->e_ident[EI_DATA]   = ELF_DATA;
 337        elf->e_ident[EI_VERSION]= EV_CURRENT;
 338        elf->e_ident[EI_OSABI] = ELF_OSABI;
 339        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 340        elf->e_type     = ET_CORE;
 341        elf->e_machine  = ELF_ARCH;
 342        elf->e_version  = EV_CURRENT;
 343        elf->e_entry    = 0;
 344        elf->e_phoff    = sizeof(struct elfhdr);
 345        elf->e_shoff    = 0;
 346        elf->e_flags    = ELF_CORE_EFLAGS;
 347        elf->e_ehsize   = sizeof(struct elfhdr);
 348        elf->e_phentsize= sizeof(struct elf_phdr);
 349        elf->e_phnum    = nphdr;
 350        elf->e_shentsize= 0;
 351        elf->e_shnum    = 0;
 352        elf->e_shstrndx = 0;
 353
 354        /* setup ELF PT_NOTE program header */
 355        nhdr = (struct elf_phdr *) bufp;
 356        bufp += sizeof(struct elf_phdr);
 357        offset += sizeof(struct elf_phdr);
 358        nhdr->p_type    = PT_NOTE;
 359        nhdr->p_offset  = 0;
 360        nhdr->p_vaddr   = 0;
 361        nhdr->p_paddr   = 0;
 362        nhdr->p_filesz  = 0;
 363        nhdr->p_memsz   = 0;
 364        nhdr->p_flags   = 0;
 365        nhdr->p_align   = 0;
 366
 367        /* setup ELF PT_LOAD program header for every area */
 368        list_for_each_entry(m, &kclist_head, list) {
 369                phdr = (struct elf_phdr *) bufp;
 370                bufp += sizeof(struct elf_phdr);
 371                offset += sizeof(struct elf_phdr);
 372
 373                phdr->p_type    = PT_LOAD;
 374                phdr->p_flags   = PF_R|PF_W|PF_X;
 375                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 376                phdr->p_vaddr   = (size_t)m->addr;
 377                phdr->p_paddr   = 0;
 378                phdr->p_filesz  = phdr->p_memsz = m->size;
 379                phdr->p_align   = PAGE_SIZE;
 380        }
 381
 382        /*
 383         * Set up the notes in similar form to SVR4 core dumps made
 384         * with info from their /proc.
 385         */
 386        nhdr->p_offset  = offset;
 387
 388        /* set up the process status */
 389        notes[0].name = CORE_STR;
 390        notes[0].type = NT_PRSTATUS;
 391        notes[0].datasz = sizeof(struct elf_prstatus);
 392        notes[0].data = &prstatus;
 393
 394        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 395
 396        nhdr->p_filesz  = notesize(&notes[0]);
 397        bufp = storenote(&notes[0], bufp);
 398
 399        /* set up the process info */
 400        notes[1].name   = CORE_STR;
 401        notes[1].type   = NT_PRPSINFO;
 402        notes[1].datasz = sizeof(struct elf_prpsinfo);
 403        notes[1].data   = &prpsinfo;
 404
 405        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 406        prpsinfo.pr_state       = 0;
 407        prpsinfo.pr_sname       = 'R';
 408        prpsinfo.pr_zomb        = 0;
 409
 410        strcpy(prpsinfo.pr_fname, "vmlinux");
 411        strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
 412
 413        nhdr->p_filesz  += notesize(&notes[1]);
 414        bufp = storenote(&notes[1], bufp);
 415
 416        /* set up the task structure */
 417        notes[2].name   = CORE_STR;
 418        notes[2].type   = NT_TASKSTRUCT;
 419        notes[2].datasz = sizeof(struct task_struct);
 420        notes[2].data   = current;
 421
 422        nhdr->p_filesz  += notesize(&notes[2]);
 423        bufp = storenote(&notes[2], bufp);
 424
 425} /* end elf_kcore_store_hdr() */
 426
 427/*****************************************************************************/
 428/*
 429 * read from the ELF header and then kernel memory
 430 */
 431static ssize_t
 432read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 433{
 434        ssize_t acc = 0;
 435        size_t size, tsz;
 436        size_t elf_buflen;
 437        int nphdr;
 438        unsigned long start;
 439
 440        read_lock(&kclist_lock);
 441        size = get_kcore_size(&nphdr, &elf_buflen);
 442
 443        if (buflen == 0 || *fpos >= size) {
 444                read_unlock(&kclist_lock);
 445                return 0;
 446        }
 447
 448        /* trim buflen to not go beyond EOF */
 449        if (buflen > size - *fpos)
 450                buflen = size - *fpos;
 451
 452        /* construct an ELF core header if we'll need some of it */
 453        if (*fpos < elf_buflen) {
 454                char * elf_buf;
 455
 456                tsz = elf_buflen - *fpos;
 457                if (buflen < tsz)
 458                        tsz = buflen;
 459                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 460                if (!elf_buf) {
 461                        read_unlock(&kclist_lock);
 462                        return -ENOMEM;
 463                }
 464                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 465                read_unlock(&kclist_lock);
 466                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 467                        kfree(elf_buf);
 468                        return -EFAULT;
 469                }
 470                kfree(elf_buf);
 471                buflen -= tsz;
 472                *fpos += tsz;
 473                buffer += tsz;
 474                acc += tsz;
 475
 476                /* leave now if filled buffer already */
 477                if (buflen == 0)
 478                        return acc;
 479        } else
 480                read_unlock(&kclist_lock);
 481
 482        /*
 483         * Check to see if our file offset matches with any of
 484         * the addresses in the elf_phdr on our list.
 485         */
 486        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 487        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 488                tsz = buflen;
 489                
 490        while (buflen) {
 491                struct kcore_list *m;
 492
 493                read_lock(&kclist_lock);
 494                list_for_each_entry(m, &kclist_head, list) {
 495                        if (start >= m->addr && start < (m->addr+m->size))
 496                                break;
 497                }
 498                read_unlock(&kclist_lock);
 499
 500                if (&m->list == &kclist_head) {
 501                        if (clear_user(buffer, tsz))
 502                                return -EFAULT;
 503                } else if (is_vmalloc_or_module_addr((void *)start)) {
 504                        char * elf_buf;
 505
 506                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 507                        if (!elf_buf)
 508                                return -ENOMEM;
 509                        vread(elf_buf, (char *)start, tsz);
 510                        /* we have to zero-fill user buffer even if no read */
 511                        if (copy_to_user(buffer, elf_buf, tsz)) {
 512                                kfree(elf_buf);
 513                                return -EFAULT;
 514                        }
 515                        kfree(elf_buf);
 516                } else {
 517                        if (kern_addr_valid(start)) {
 518                                unsigned long n;
 519
 520                                n = copy_to_user(buffer, (char *)start, tsz);
 521                                /*
 522                                 * We cannot distinguish between fault on source
 523                                 * and fault on destination. When this happens
 524                                 * we clear too and hope it will trigger the
 525                                 * EFAULT again.
 526                                 */
 527                                if (n) { 
 528                                        if (clear_user(buffer + tsz - n,
 529                                                                n))
 530                                                return -EFAULT;
 531                                }
 532                        } else {
 533                                if (clear_user(buffer, tsz))
 534                                        return -EFAULT;
 535                        }
 536                }
 537                buflen -= tsz;
 538                *fpos += tsz;
 539                buffer += tsz;
 540                acc += tsz;
 541                start += tsz;
 542                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 543        }
 544
 545        return acc;
 546}
 547
 548
 549static int open_kcore(struct inode *inode, struct file *filp)
 550{
 551        if (!capable(CAP_SYS_RAWIO))
 552                return -EPERM;
 553        if (kcore_need_update)
 554                kcore_update_ram();
 555        if (i_size_read(inode) != proc_root_kcore->size) {
 556                mutex_lock(&inode->i_mutex);
 557                i_size_write(inode, proc_root_kcore->size);
 558                mutex_unlock(&inode->i_mutex);
 559        }
 560        return 0;
 561}
 562
 563
 564static const struct file_operations proc_kcore_operations = {
 565        .read           = read_kcore,
 566        .open           = open_kcore,
 567        .llseek         = default_llseek,
 568};
 569
 570/* just remember that we have to update kcore */
 571static int __meminit kcore_callback(struct notifier_block *self,
 572                                    unsigned long action, void *arg)
 573{
 574        switch (action) {
 575        case MEM_ONLINE:
 576        case MEM_OFFLINE:
 577                write_lock(&kclist_lock);
 578                kcore_need_update = 1;
 579                write_unlock(&kclist_lock);
 580        }
 581        return NOTIFY_OK;
 582}
 583
 584static struct notifier_block kcore_callback_nb __meminitdata = {
 585        .notifier_call = kcore_callback,
 586        .priority = 0,
 587};
 588
 589static struct kcore_list kcore_vmalloc;
 590
 591#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 592static struct kcore_list kcore_text;
 593/*
 594 * If defined, special segment is used for mapping kernel text instead of
 595 * direct-map area. We need to create special TEXT section.
 596 */
 597static void __init proc_kcore_text_init(void)
 598{
 599        kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 600}
 601#else
 602static void __init proc_kcore_text_init(void)
 603{
 604}
 605#endif
 606
 607#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 608/*
 609 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 610 */
 611struct kcore_list kcore_modules;
 612static void __init add_modules_range(void)
 613{
 614        kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 615                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 616}
 617#else
 618static void __init add_modules_range(void)
 619{
 620}
 621#endif
 622
 623static int __init proc_kcore_init(void)
 624{
 625        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 626                                      &proc_kcore_operations);
 627        if (!proc_root_kcore) {
 628                pr_err("couldn't create /proc/kcore\n");
 629                return 0; /* Always returns 0. */
 630        }
 631        /* Store text area if it's special */
 632        proc_kcore_text_init();
 633        /* Store vmalloc area */
 634        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 635                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 636        add_modules_range();
 637        /* Store direct-map area from physical memory map */
 638        kcore_update_ram();
 639        register_hotmemory_notifier(&kcore_callback_nb);
 640
 641        return 0;
 642}
 643module_init(proc_kcore_init);
 644