linux/fs/proc/kcore.c
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/kcore.h>
  15#include <linux/user.h>
  16#include <linux/capability.h>
  17#include <linux/elf.h>
  18#include <linux/elfcore.h>
  19#include <linux/notifier.h>
  20#include <linux/vmalloc.h>
  21#include <linux/highmem.h>
  22#include <linux/printk.h>
  23#include <linux/bootmem.h>
  24#include <linux/init.h>
  25#include <linux/slab.h>
  26#include <asm/uaccess.h>
  27#include <asm/io.h>
  28#include <linux/list.h>
  29#include <linux/ioport.h>
  30#include <linux/memory.h>
  31#include <asm/sections.h>
  32#include "internal.h"
  33
  34#define CORE_STR "CORE"
  35
  36#ifndef ELF_CORE_EFLAGS
  37#define ELF_CORE_EFLAGS 0
  38#endif
  39
  40static struct proc_dir_entry *proc_root_kcore;
  41
  42
  43#ifndef kc_vaddr_to_offset
  44#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  45#endif
  46#ifndef kc_offset_to_vaddr
  47#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  48#endif
  49
  50/* An ELF note in memory */
  51struct memelfnote
  52{
  53        const char *name;
  54        int type;
  55        unsigned int datasz;
  56        void *data;
  57};
  58
  59static LIST_HEAD(kclist_head);
  60static DEFINE_RWLOCK(kclist_lock);
  61static int kcore_need_update = 1;
  62
  63void
  64kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  65{
  66        new->addr = (unsigned long)addr;
  67        new->size = size;
  68        new->type = type;
  69
  70        write_lock(&kclist_lock);
  71        list_add_tail(&new->list, &kclist_head);
  72        write_unlock(&kclist_lock);
  73}
  74
  75static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  76{
  77        size_t try, size;
  78        struct kcore_list *m;
  79
  80        *nphdr = 1; /* PT_NOTE */
  81        size = 0;
  82
  83        list_for_each_entry(m, &kclist_head, list) {
  84                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  85                if (try > size)
  86                        size = try;
  87                *nphdr = *nphdr + 1;
  88        }
  89        *elf_buflen =   sizeof(struct elfhdr) + 
  90                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  91                        3 * ((sizeof(struct elf_note)) +
  92                             roundup(sizeof(CORE_STR), 4)) +
  93                        roundup(sizeof(struct elf_prstatus), 4) +
  94                        roundup(sizeof(struct elf_prpsinfo), 4) +
  95                        roundup(arch_task_struct_size, 4);
  96        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  97        return size + *elf_buflen;
  98}
  99
 100static void free_kclist_ents(struct list_head *head)
 101{
 102        struct kcore_list *tmp, *pos;
 103
 104        list_for_each_entry_safe(pos, tmp, head, list) {
 105                list_del(&pos->list);
 106                kfree(pos);
 107        }
 108}
 109/*
 110 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 111 */
 112static void __kcore_update_ram(struct list_head *list)
 113{
 114        int nphdr;
 115        size_t size;
 116        struct kcore_list *tmp, *pos;
 117        LIST_HEAD(garbage);
 118
 119        write_lock(&kclist_lock);
 120        if (kcore_need_update) {
 121                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 122                        if (pos->type == KCORE_RAM
 123                                || pos->type == KCORE_VMEMMAP)
 124                                list_move(&pos->list, &garbage);
 125                }
 126                list_splice_tail(list, &kclist_head);
 127        } else
 128                list_splice(list, &garbage);
 129        kcore_need_update = 0;
 130        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 131        write_unlock(&kclist_lock);
 132
 133        free_kclist_ents(&garbage);
 134}
 135
 136
 137#ifdef CONFIG_HIGHMEM
 138/*
 139 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 140 * because memory hole is not as big as !HIGHMEM case.
 141 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 142 */
 143static int kcore_update_ram(void)
 144{
 145        LIST_HEAD(head);
 146        struct kcore_list *ent;
 147        int ret = 0;
 148
 149        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 150        if (!ent)
 151                return -ENOMEM;
 152        ent->addr = (unsigned long)__va(0);
 153        ent->size = max_low_pfn << PAGE_SHIFT;
 154        ent->type = KCORE_RAM;
 155        list_add(&ent->list, &head);
 156        __kcore_update_ram(&head);
 157        return ret;
 158}
 159
 160#else /* !CONFIG_HIGHMEM */
 161
 162#ifdef CONFIG_SPARSEMEM_VMEMMAP
 163/* calculate vmemmap's address from given system ram pfn and register it */
 164static int
 165get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 166{
 167        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 168        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 169        unsigned long start, end;
 170        struct kcore_list *vmm, *tmp;
 171
 172
 173        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 174        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 175        end = PAGE_ALIGN(end);
 176        /* overlap check (because we have to align page */
 177        list_for_each_entry(tmp, head, list) {
 178                if (tmp->type != KCORE_VMEMMAP)
 179                        continue;
 180                if (start < tmp->addr + tmp->size)
 181                        if (end > tmp->addr)
 182                                end = tmp->addr;
 183        }
 184        if (start < end) {
 185                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 186                if (!vmm)
 187                        return 0;
 188                vmm->addr = start;
 189                vmm->size = end - start;
 190                vmm->type = KCORE_VMEMMAP;
 191                list_add_tail(&vmm->list, head);
 192        }
 193        return 1;
 194
 195}
 196#else
 197static int
 198get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 199{
 200        return 1;
 201}
 202
 203#endif
 204
 205static int
 206kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 207{
 208        struct list_head *head = (struct list_head *)arg;
 209        struct kcore_list *ent;
 210
 211        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 212        if (!ent)
 213                return -ENOMEM;
 214        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 215        ent->size = nr_pages << PAGE_SHIFT;
 216
 217        /* Sanity check: Can happen in 32bit arch...maybe */
 218        if (ent->addr < (unsigned long) __va(0))
 219                goto free_out;
 220
 221        /* cut not-mapped area. ....from ppc-32 code. */
 222        if (ULONG_MAX - ent->addr < ent->size)
 223                ent->size = ULONG_MAX - ent->addr;
 224
 225        /* cut when vmalloc() area is higher than direct-map area */
 226        if (VMALLOC_START > (unsigned long)__va(0)) {
 227                if (ent->addr > VMALLOC_START)
 228                        goto free_out;
 229                if (VMALLOC_START - ent->addr < ent->size)
 230                        ent->size = VMALLOC_START - ent->addr;
 231        }
 232
 233        ent->type = KCORE_RAM;
 234        list_add_tail(&ent->list, head);
 235
 236        if (!get_sparsemem_vmemmap_info(ent, head)) {
 237                list_del(&ent->list);
 238                goto free_out;
 239        }
 240
 241        return 0;
 242free_out:
 243        kfree(ent);
 244        return 1;
 245}
 246
 247static int kcore_update_ram(void)
 248{
 249        int nid, ret;
 250        unsigned long end_pfn;
 251        LIST_HEAD(head);
 252
 253        /* Not inialized....update now */
 254        /* find out "max pfn" */
 255        end_pfn = 0;
 256        for_each_node_state(nid, N_MEMORY) {
 257                unsigned long node_end;
 258                node_end = node_end_pfn(nid);
 259                if (end_pfn < node_end)
 260                        end_pfn = node_end;
 261        }
 262        /* scan 0 to max_pfn */
 263        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 264        if (ret) {
 265                free_kclist_ents(&head);
 266                return -ENOMEM;
 267        }
 268        __kcore_update_ram(&head);
 269        return ret;
 270}
 271#endif /* CONFIG_HIGHMEM */
 272
 273/*****************************************************************************/
 274/*
 275 * determine size of ELF note
 276 */
 277static int notesize(struct memelfnote *en)
 278{
 279        int sz;
 280
 281        sz = sizeof(struct elf_note);
 282        sz += roundup((strlen(en->name) + 1), 4);
 283        sz += roundup(en->datasz, 4);
 284
 285        return sz;
 286} /* end notesize() */
 287
 288/*****************************************************************************/
 289/*
 290 * store a note in the header buffer
 291 */
 292static char *storenote(struct memelfnote *men, char *bufp)
 293{
 294        struct elf_note en;
 295
 296#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 297
 298        en.n_namesz = strlen(men->name) + 1;
 299        en.n_descsz = men->datasz;
 300        en.n_type = men->type;
 301
 302        DUMP_WRITE(&en, sizeof(en));
 303        DUMP_WRITE(men->name, en.n_namesz);
 304
 305        /* XXX - cast from long long to long to avoid need for libgcc.a */
 306        bufp = (char*) roundup((unsigned long)bufp,4);
 307        DUMP_WRITE(men->data, men->datasz);
 308        bufp = (char*) roundup((unsigned long)bufp,4);
 309
 310#undef DUMP_WRITE
 311
 312        return bufp;
 313} /* end storenote() */
 314
 315/*
 316 * store an ELF coredump header in the supplied buffer
 317 * nphdr is the number of elf_phdr to insert
 318 */
 319static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 320{
 321        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 322        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 323        struct elf_phdr *nhdr, *phdr;
 324        struct elfhdr *elf;
 325        struct memelfnote notes[3];
 326        off_t offset = 0;
 327        struct kcore_list *m;
 328
 329        /* setup ELF header */
 330        elf = (struct elfhdr *) bufp;
 331        bufp += sizeof(struct elfhdr);
 332        offset += sizeof(struct elfhdr);
 333        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 334        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 335        elf->e_ident[EI_DATA]   = ELF_DATA;
 336        elf->e_ident[EI_VERSION]= EV_CURRENT;
 337        elf->e_ident[EI_OSABI] = ELF_OSABI;
 338        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 339        elf->e_type     = ET_CORE;
 340        elf->e_machine  = ELF_ARCH;
 341        elf->e_version  = EV_CURRENT;
 342        elf->e_entry    = 0;
 343        elf->e_phoff    = sizeof(struct elfhdr);
 344        elf->e_shoff    = 0;
 345        elf->e_flags    = ELF_CORE_EFLAGS;
 346        elf->e_ehsize   = sizeof(struct elfhdr);
 347        elf->e_phentsize= sizeof(struct elf_phdr);
 348        elf->e_phnum    = nphdr;
 349        elf->e_shentsize= 0;
 350        elf->e_shnum    = 0;
 351        elf->e_shstrndx = 0;
 352
 353        /* setup ELF PT_NOTE program header */
 354        nhdr = (struct elf_phdr *) bufp;
 355        bufp += sizeof(struct elf_phdr);
 356        offset += sizeof(struct elf_phdr);
 357        nhdr->p_type    = PT_NOTE;
 358        nhdr->p_offset  = 0;
 359        nhdr->p_vaddr   = 0;
 360        nhdr->p_paddr   = 0;
 361        nhdr->p_filesz  = 0;
 362        nhdr->p_memsz   = 0;
 363        nhdr->p_flags   = 0;
 364        nhdr->p_align   = 0;
 365
 366        /* setup ELF PT_LOAD program header for every area */
 367        list_for_each_entry(m, &kclist_head, list) {
 368                phdr = (struct elf_phdr *) bufp;
 369                bufp += sizeof(struct elf_phdr);
 370                offset += sizeof(struct elf_phdr);
 371
 372                phdr->p_type    = PT_LOAD;
 373                phdr->p_flags   = PF_R|PF_W|PF_X;
 374                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 375                phdr->p_vaddr   = (size_t)m->addr;
 376                phdr->p_paddr   = 0;
 377                phdr->p_filesz  = phdr->p_memsz = m->size;
 378                phdr->p_align   = PAGE_SIZE;
 379        }
 380
 381        /*
 382         * Set up the notes in similar form to SVR4 core dumps made
 383         * with info from their /proc.
 384         */
 385        nhdr->p_offset  = offset;
 386
 387        /* set up the process status */
 388        notes[0].name = CORE_STR;
 389        notes[0].type = NT_PRSTATUS;
 390        notes[0].datasz = sizeof(struct elf_prstatus);
 391        notes[0].data = &prstatus;
 392
 393        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 394
 395        nhdr->p_filesz  = notesize(&notes[0]);
 396        bufp = storenote(&notes[0], bufp);
 397
 398        /* set up the process info */
 399        notes[1].name   = CORE_STR;
 400        notes[1].type   = NT_PRPSINFO;
 401        notes[1].datasz = sizeof(struct elf_prpsinfo);
 402        notes[1].data   = &prpsinfo;
 403
 404        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 405        prpsinfo.pr_state       = 0;
 406        prpsinfo.pr_sname       = 'R';
 407        prpsinfo.pr_zomb        = 0;
 408
 409        strcpy(prpsinfo.pr_fname, "vmlinux");
 410        strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
 411
 412        nhdr->p_filesz  += notesize(&notes[1]);
 413        bufp = storenote(&notes[1], bufp);
 414
 415        /* set up the task structure */
 416        notes[2].name   = CORE_STR;
 417        notes[2].type   = NT_TASKSTRUCT;
 418        notes[2].datasz = arch_task_struct_size;
 419        notes[2].data   = current;
 420
 421        nhdr->p_filesz  += notesize(&notes[2]);
 422        bufp = storenote(&notes[2], bufp);
 423
 424} /* end elf_kcore_store_hdr() */
 425
 426/*****************************************************************************/
 427/*
 428 * read from the ELF header and then kernel memory
 429 */
 430static ssize_t
 431read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 432{
 433        ssize_t acc = 0;
 434        size_t size, tsz;
 435        size_t elf_buflen;
 436        int nphdr;
 437        unsigned long start;
 438
 439        read_lock(&kclist_lock);
 440        size = get_kcore_size(&nphdr, &elf_buflen);
 441
 442        if (buflen == 0 || *fpos >= size) {
 443                read_unlock(&kclist_lock);
 444                return 0;
 445        }
 446
 447        /* trim buflen to not go beyond EOF */
 448        if (buflen > size - *fpos)
 449                buflen = size - *fpos;
 450
 451        /* construct an ELF core header if we'll need some of it */
 452        if (*fpos < elf_buflen) {
 453                char * elf_buf;
 454
 455                tsz = elf_buflen - *fpos;
 456                if (buflen < tsz)
 457                        tsz = buflen;
 458                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 459                if (!elf_buf) {
 460                        read_unlock(&kclist_lock);
 461                        return -ENOMEM;
 462                }
 463                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 464                read_unlock(&kclist_lock);
 465                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 466                        kfree(elf_buf);
 467                        return -EFAULT;
 468                }
 469                kfree(elf_buf);
 470                buflen -= tsz;
 471                *fpos += tsz;
 472                buffer += tsz;
 473                acc += tsz;
 474
 475                /* leave now if filled buffer already */
 476                if (buflen == 0)
 477                        return acc;
 478        } else
 479                read_unlock(&kclist_lock);
 480
 481        /*
 482         * Check to see if our file offset matches with any of
 483         * the addresses in the elf_phdr on our list.
 484         */
 485        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 486        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 487                tsz = buflen;
 488                
 489        while (buflen) {
 490                struct kcore_list *m;
 491
 492                read_lock(&kclist_lock);
 493                list_for_each_entry(m, &kclist_head, list) {
 494                        if (start >= m->addr && start < (m->addr+m->size))
 495                                break;
 496                }
 497                read_unlock(&kclist_lock);
 498
 499                if (&m->list == &kclist_head) {
 500                        if (clear_user(buffer, tsz))
 501                                return -EFAULT;
 502                } else if (is_vmalloc_or_module_addr((void *)start)) {
 503                        char * elf_buf;
 504
 505                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 506                        if (!elf_buf)
 507                                return -ENOMEM;
 508                        vread(elf_buf, (char *)start, tsz);
 509                        /* we have to zero-fill user buffer even if no read */
 510                        if (copy_to_user(buffer, elf_buf, tsz)) {
 511                                kfree(elf_buf);
 512                                return -EFAULT;
 513                        }
 514                        kfree(elf_buf);
 515                } else {
 516                        if (kern_addr_valid(start)) {
 517                                unsigned long n;
 518
 519                                n = copy_to_user(buffer, (char *)start, tsz);
 520                                /*
 521                                 * We cannot distinguish between fault on source
 522                                 * and fault on destination. When this happens
 523                                 * we clear too and hope it will trigger the
 524                                 * EFAULT again.
 525                                 */
 526                                if (n) { 
 527                                        if (clear_user(buffer + tsz - n,
 528                                                                n))
 529                                                return -EFAULT;
 530                                }
 531                        } else {
 532                                if (clear_user(buffer, tsz))
 533                                        return -EFAULT;
 534                        }
 535                }
 536                buflen -= tsz;
 537                *fpos += tsz;
 538                buffer += tsz;
 539                acc += tsz;
 540                start += tsz;
 541                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 542        }
 543
 544        return acc;
 545}
 546
 547
 548static int open_kcore(struct inode *inode, struct file *filp)
 549{
 550        if (!capable(CAP_SYS_RAWIO))
 551                return -EPERM;
 552        if (kcore_need_update)
 553                kcore_update_ram();
 554        if (i_size_read(inode) != proc_root_kcore->size) {
 555                mutex_lock(&inode->i_mutex);
 556                i_size_write(inode, proc_root_kcore->size);
 557                mutex_unlock(&inode->i_mutex);
 558        }
 559        return 0;
 560}
 561
 562
 563static const struct file_operations proc_kcore_operations = {
 564        .read           = read_kcore,
 565        .open           = open_kcore,
 566        .llseek         = default_llseek,
 567};
 568
 569/* just remember that we have to update kcore */
 570static int __meminit kcore_callback(struct notifier_block *self,
 571                                    unsigned long action, void *arg)
 572{
 573        switch (action) {
 574        case MEM_ONLINE:
 575        case MEM_OFFLINE:
 576                write_lock(&kclist_lock);
 577                kcore_need_update = 1;
 578                write_unlock(&kclist_lock);
 579        }
 580        return NOTIFY_OK;
 581}
 582
 583static struct notifier_block kcore_callback_nb __meminitdata = {
 584        .notifier_call = kcore_callback,
 585        .priority = 0,
 586};
 587
 588static struct kcore_list kcore_vmalloc;
 589
 590#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 591static struct kcore_list kcore_text;
 592/*
 593 * If defined, special segment is used for mapping kernel text instead of
 594 * direct-map area. We need to create special TEXT section.
 595 */
 596static void __init proc_kcore_text_init(void)
 597{
 598        kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 599}
 600#else
 601static void __init proc_kcore_text_init(void)
 602{
 603}
 604#endif
 605
 606#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 607/*
 608 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 609 */
 610struct kcore_list kcore_modules;
 611static void __init add_modules_range(void)
 612{
 613        if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
 614                kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 615                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 616        }
 617}
 618#else
 619static void __init add_modules_range(void)
 620{
 621}
 622#endif
 623
 624static int __init proc_kcore_init(void)
 625{
 626        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 627                                      &proc_kcore_operations);
 628        if (!proc_root_kcore) {
 629                pr_err("couldn't create /proc/kcore\n");
 630                return 0; /* Always returns 0. */
 631        }
 632        /* Store text area if it's special */
 633        proc_kcore_text_init();
 634        /* Store vmalloc area */
 635        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 636                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 637        add_modules_range();
 638        /* Store direct-map area from physical memory map */
 639        kcore_update_ram();
 640        register_hotmemory_notifier(&kcore_callback_nb);
 641
 642        return 0;
 643}
 644fs_initcall(proc_kcore_init);
 645