linux/fs/proc/kcore.c
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/kcore.h>
  15#include <linux/user.h>
  16#include <linux/capability.h>
  17#include <linux/elf.h>
  18#include <linux/elfcore.h>
  19#include <linux/notifier.h>
  20#include <linux/vmalloc.h>
  21#include <linux/highmem.h>
  22#include <linux/printk.h>
  23#include <linux/bootmem.h>
  24#include <linux/init.h>
  25#include <linux/slab.h>
  26#include <asm/uaccess.h>
  27#include <asm/io.h>
  28#include <linux/list.h>
  29#include <linux/ioport.h>
  30#include <linux/memory.h>
  31#include <asm/sections.h>
  32#include "internal.h"
  33
  34#define CORE_STR "CORE"
  35
  36#ifndef ELF_CORE_EFLAGS
  37#define ELF_CORE_EFLAGS 0
  38#endif
  39
  40static struct proc_dir_entry *proc_root_kcore;
  41
  42
  43#ifndef kc_vaddr_to_offset
  44#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  45#endif
  46#ifndef kc_offset_to_vaddr
  47#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  48#endif
  49
  50/* An ELF note in memory */
  51struct memelfnote
  52{
  53        const char *name;
  54        int type;
  55        unsigned int datasz;
  56        void *data;
  57};
  58
  59static LIST_HEAD(kclist_head);
  60static DEFINE_RWLOCK(kclist_lock);
  61static int kcore_need_update = 1;
  62
  63void
  64kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  65{
  66        new->addr = (unsigned long)addr;
  67        new->size = size;
  68        new->type = type;
  69
  70        write_lock(&kclist_lock);
  71        list_add_tail(&new->list, &kclist_head);
  72        write_unlock(&kclist_lock);
  73}
  74
  75static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  76{
  77        size_t try, size;
  78        struct kcore_list *m;
  79
  80        *nphdr = 1; /* PT_NOTE */
  81        size = 0;
  82
  83        list_for_each_entry(m, &kclist_head, list) {
  84                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  85                if (try > size)
  86                        size = try;
  87                *nphdr = *nphdr + 1;
  88        }
  89        *elf_buflen =   sizeof(struct elfhdr) + 
  90                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  91                        3 * ((sizeof(struct elf_note)) +
  92                             roundup(sizeof(CORE_STR), 4)) +
  93                        roundup(sizeof(struct elf_prstatus), 4) +
  94                        roundup(sizeof(struct elf_prpsinfo), 4) +
  95                        roundup(sizeof(struct task_struct), 4);
  96        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  97        return size + *elf_buflen;
  98}
  99
 100static void free_kclist_ents(struct list_head *head)
 101{
 102        struct kcore_list *tmp, *pos;
 103
 104        list_for_each_entry_safe(pos, tmp, head, list) {
 105                list_del(&pos->list);
 106                kfree(pos);
 107        }
 108}
 109/*
 110 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 111 */
 112static void __kcore_update_ram(struct list_head *list)
 113{
 114        int nphdr;
 115        size_t size;
 116        struct kcore_list *tmp, *pos;
 117        LIST_HEAD(garbage);
 118
 119        write_lock(&kclist_lock);
 120        if (kcore_need_update) {
 121                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 122                        if (pos->type == KCORE_RAM
 123                                || pos->type == KCORE_VMEMMAP)
 124                                list_move(&pos->list, &garbage);
 125                }
 126                list_splice_tail(list, &kclist_head);
 127        } else
 128                list_splice(list, &garbage);
 129        kcore_need_update = 0;
 130        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 131        write_unlock(&kclist_lock);
 132
 133        free_kclist_ents(&garbage);
 134}
 135
 136
 137#ifdef CONFIG_HIGHMEM
 138/*
 139 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 140 * because memory hole is not as big as !HIGHMEM case.
 141 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 142 */
 143static int kcore_update_ram(void)
 144{
 145        LIST_HEAD(head);
 146        struct kcore_list *ent;
 147        int ret = 0;
 148
 149        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 150        if (!ent)
 151                return -ENOMEM;
 152        ent->addr = (unsigned long)__va(0);
 153        ent->size = max_low_pfn << PAGE_SHIFT;
 154        ent->type = KCORE_RAM;
 155        list_add(&ent->list, &head);
 156        __kcore_update_ram(&head);
 157        return ret;
 158}
 159
 160#else /* !CONFIG_HIGHMEM */
 161
 162#ifdef CONFIG_SPARSEMEM_VMEMMAP
 163/* calculate vmemmap's address from given system ram pfn and register it */
 164static int
 165get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 166{
 167        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 168        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 169        unsigned long start, end;
 170        struct kcore_list *vmm, *tmp;
 171
 172
 173        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 174        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 175        end = ALIGN(end, PAGE_SIZE);
 176        /* overlap check (because we have to align page */
 177        list_for_each_entry(tmp, head, list) {
 178                if (tmp->type != KCORE_VMEMMAP)
 179                        continue;
 180                if (start < tmp->addr + tmp->size)
 181                        if (end > tmp->addr)
 182                                end = tmp->addr;
 183        }
 184        if (start < end) {
 185                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 186                if (!vmm)
 187                        return 0;
 188                vmm->addr = start;
 189                vmm->size = end - start;
 190                vmm->type = KCORE_VMEMMAP;
 191                list_add_tail(&vmm->list, head);
 192        }
 193        return 1;
 194
 195}
 196#else
 197static int
 198get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 199{
 200        return 1;
 201}
 202
 203#endif
 204
 205static int
 206kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 207{
 208        struct list_head *head = (struct list_head *)arg;
 209        struct kcore_list *ent;
 210
 211        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 212        if (!ent)
 213                return -ENOMEM;
 214        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 215        ent->size = nr_pages << PAGE_SHIFT;
 216
 217        /* Sanity check: Can happen in 32bit arch...maybe */
 218        if (ent->addr < (unsigned long) __va(0))
 219                goto free_out;
 220
 221        /* cut not-mapped area. ....from ppc-32 code. */
 222        if (ULONG_MAX - ent->addr < ent->size)
 223                ent->size = ULONG_MAX - ent->addr;
 224
 225        /* cut when vmalloc() area is higher than direct-map area */
 226        if (VMALLOC_START > (unsigned long)__va(0)) {
 227                if (ent->addr > VMALLOC_START)
 228                        goto free_out;
 229                if (VMALLOC_START - ent->addr < ent->size)
 230                        ent->size = VMALLOC_START - ent->addr;
 231        }
 232
 233        ent->type = KCORE_RAM;
 234        list_add_tail(&ent->list, head);
 235
 236        if (!get_sparsemem_vmemmap_info(ent, head)) {
 237                list_del(&ent->list);
 238                goto free_out;
 239        }
 240
 241        return 0;
 242free_out:
 243        kfree(ent);
 244        return 1;
 245}
 246
 247static int kcore_update_ram(void)
 248{
 249        int nid, ret;
 250        unsigned long end_pfn;
 251        LIST_HEAD(head);
 252
 253        /* Not inialized....update now */
 254        /* find out "max pfn" */
 255        end_pfn = 0;
 256        for_each_node_state(nid, N_MEMORY) {
 257                unsigned long node_end;
 258                node_end  = NODE_DATA(nid)->node_start_pfn +
 259                        NODE_DATA(nid)->node_spanned_pages;
 260                if (end_pfn < node_end)
 261                        end_pfn = node_end;
 262        }
 263        /* scan 0 to max_pfn */
 264        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 265        if (ret) {
 266                free_kclist_ents(&head);
 267                return -ENOMEM;
 268        }
 269        __kcore_update_ram(&head);
 270        return ret;
 271}
 272#endif /* CONFIG_HIGHMEM */
 273
 274/*****************************************************************************/
 275/*
 276 * determine size of ELF note
 277 */
 278static int notesize(struct memelfnote *en)
 279{
 280        int sz;
 281
 282        sz = sizeof(struct elf_note);
 283        sz += roundup((strlen(en->name) + 1), 4);
 284        sz += roundup(en->datasz, 4);
 285
 286        return sz;
 287} /* end notesize() */
 288
 289/*****************************************************************************/
 290/*
 291 * store a note in the header buffer
 292 */
 293static char *storenote(struct memelfnote *men, char *bufp)
 294{
 295        struct elf_note en;
 296
 297#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 298
 299        en.n_namesz = strlen(men->name) + 1;
 300        en.n_descsz = men->datasz;
 301        en.n_type = men->type;
 302
 303        DUMP_WRITE(&en, sizeof(en));
 304        DUMP_WRITE(men->name, en.n_namesz);
 305
 306        /* XXX - cast from long long to long to avoid need for libgcc.a */
 307        bufp = (char*) roundup((unsigned long)bufp,4);
 308        DUMP_WRITE(men->data, men->datasz);
 309        bufp = (char*) roundup((unsigned long)bufp,4);
 310
 311#undef DUMP_WRITE
 312
 313        return bufp;
 314} /* end storenote() */
 315
 316/*
 317 * store an ELF coredump header in the supplied buffer
 318 * nphdr is the number of elf_phdr to insert
 319 */
 320static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 321{
 322        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 323        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 324        struct elf_phdr *nhdr, *phdr;
 325        struct elfhdr *elf;
 326        struct memelfnote notes[3];
 327        off_t offset = 0;
 328        struct kcore_list *m;
 329
 330        /* setup ELF header */
 331        elf = (struct elfhdr *) bufp;
 332        bufp += sizeof(struct elfhdr);
 333        offset += sizeof(struct elfhdr);
 334        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 335        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 336        elf->e_ident[EI_DATA]   = ELF_DATA;
 337        elf->e_ident[EI_VERSION]= EV_CURRENT;
 338        elf->e_ident[EI_OSABI] = ELF_OSABI;
 339        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 340        elf->e_type     = ET_CORE;
 341        elf->e_machine  = ELF_ARCH;
 342        elf->e_version  = EV_CURRENT;
 343        elf->e_entry    = 0;
 344        elf->e_phoff    = sizeof(struct elfhdr);
 345        elf->e_shoff    = 0;
 346        elf->e_flags    = ELF_CORE_EFLAGS;
 347        elf->e_ehsize   = sizeof(struct elfhdr);
 348        elf->e_phentsize= sizeof(struct elf_phdr);
 349        elf->e_phnum    = nphdr;
 350        elf->e_shentsize= 0;
 351        elf->e_shnum    = 0;
 352        elf->e_shstrndx = 0;
 353
 354        /* setup ELF PT_NOTE program header */
 355        nhdr = (struct elf_phdr *) bufp;
 356        bufp += sizeof(struct elf_phdr);
 357        offset += sizeof(struct elf_phdr);
 358        nhdr->p_type    = PT_NOTE;
 359        nhdr->p_offset  = 0;
 360        nhdr->p_vaddr   = 0;
 361        nhdr->p_paddr   = 0;
 362        nhdr->p_filesz  = 0;
 363        nhdr->p_memsz   = 0;
 364        nhdr->p_flags   = 0;
 365        nhdr->p_align   = 0;
 366
 367        /* setup ELF PT_LOAD program header for every area */
 368        list_for_each_entry(m, &kclist_head, list) {
 369                phdr = (struct elf_phdr *) bufp;
 370                bufp += sizeof(struct elf_phdr);
 371                offset += sizeof(struct elf_phdr);
 372
 373                phdr->p_type    = PT_LOAD;
 374                phdr->p_flags   = PF_R|PF_W|PF_X;
 375                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 376                phdr->p_vaddr   = (size_t)m->addr;
 377                if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
 378                        phdr->p_paddr   = __pa(m->addr);
 379                else
 380                        phdr->p_paddr   = (elf_addr_t)-1;
 381                phdr->p_filesz  = phdr->p_memsz = m->size;
 382                phdr->p_align   = PAGE_SIZE;
 383        }
 384
 385        /*
 386         * Set up the notes in similar form to SVR4 core dumps made
 387         * with info from their /proc.
 388         */
 389        nhdr->p_offset  = offset;
 390
 391        /* set up the process status */
 392        notes[0].name = CORE_STR;
 393        notes[0].type = NT_PRSTATUS;
 394        notes[0].datasz = sizeof(struct elf_prstatus);
 395        notes[0].data = &prstatus;
 396
 397        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 398
 399        nhdr->p_filesz  = notesize(&notes[0]);
 400        bufp = storenote(&notes[0], bufp);
 401
 402        /* set up the process info */
 403        notes[1].name   = CORE_STR;
 404        notes[1].type   = NT_PRPSINFO;
 405        notes[1].datasz = sizeof(struct elf_prpsinfo);
 406        notes[1].data   = &prpsinfo;
 407
 408        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 409        prpsinfo.pr_state       = 0;
 410        prpsinfo.pr_sname       = 'R';
 411        prpsinfo.pr_zomb        = 0;
 412
 413        strcpy(prpsinfo.pr_fname, "vmlinux");
 414        strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
 415
 416        nhdr->p_filesz  += notesize(&notes[1]);
 417        bufp = storenote(&notes[1], bufp);
 418
 419        /* set up the task structure */
 420        notes[2].name   = CORE_STR;
 421        notes[2].type   = NT_TASKSTRUCT;
 422        notes[2].datasz = sizeof(struct task_struct);
 423        notes[2].data   = current;
 424
 425        nhdr->p_filesz  += notesize(&notes[2]);
 426        bufp = storenote(&notes[2], bufp);
 427
 428} /* end elf_kcore_store_hdr() */
 429
 430/*****************************************************************************/
 431/*
 432 * read from the ELF header and then kernel memory
 433 */
 434static ssize_t
 435read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 436{
 437        ssize_t acc = 0;
 438        size_t size, tsz;
 439        size_t elf_buflen;
 440        int nphdr;
 441        unsigned long start;
 442
 443        read_lock(&kclist_lock);
 444        size = get_kcore_size(&nphdr, &elf_buflen);
 445
 446        if (buflen == 0 || *fpos >= size) {
 447                read_unlock(&kclist_lock);
 448                return 0;
 449        }
 450
 451        /* trim buflen to not go beyond EOF */
 452        if (buflen > size - *fpos)
 453                buflen = size - *fpos;
 454
 455        /* construct an ELF core header if we'll need some of it */
 456        if (*fpos < elf_buflen) {
 457                char * elf_buf;
 458
 459                tsz = elf_buflen - *fpos;
 460                if (buflen < tsz)
 461                        tsz = buflen;
 462                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 463                if (!elf_buf) {
 464                        read_unlock(&kclist_lock);
 465                        return -ENOMEM;
 466                }
 467                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 468                read_unlock(&kclist_lock);
 469                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 470                        kfree(elf_buf);
 471                        return -EFAULT;
 472                }
 473                kfree(elf_buf);
 474                buflen -= tsz;
 475                *fpos += tsz;
 476                buffer += tsz;
 477                acc += tsz;
 478
 479                /* leave now if filled buffer already */
 480                if (buflen == 0)
 481                        return acc;
 482        } else
 483                read_unlock(&kclist_lock);
 484
 485        /*
 486         * Check to see if our file offset matches with any of
 487         * the addresses in the elf_phdr on our list.
 488         */
 489        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 490        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 491                tsz = buflen;
 492                
 493        while (buflen) {
 494                struct kcore_list *m;
 495
 496                read_lock(&kclist_lock);
 497                list_for_each_entry(m, &kclist_head, list) {
 498                        if (start >= m->addr && start < (m->addr+m->size))
 499                                break;
 500                }
 501                read_unlock(&kclist_lock);
 502
 503                if (&m->list == &kclist_head) {
 504                        if (clear_user(buffer, tsz))
 505                                return -EFAULT;
 506                } else if (is_vmalloc_or_module_addr((void *)start)) {
 507                        char * elf_buf;
 508
 509                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 510                        if (!elf_buf)
 511                                return -ENOMEM;
 512                        vread(elf_buf, (char *)start, tsz);
 513                        /* we have to zero-fill user buffer even if no read */
 514                        if (copy_to_user(buffer, elf_buf, tsz)) {
 515                                kfree(elf_buf);
 516                                return -EFAULT;
 517                        }
 518                        kfree(elf_buf);
 519                } else {
 520                        if (kern_addr_valid(start)) {
 521                                unsigned long n;
 522
 523                                n = copy_to_user(buffer, (char *)start, tsz);
 524                                /*
 525                                 * We cannot distinguish between fault on source
 526                                 * and fault on destination. When this happens
 527                                 * we clear too and hope it will trigger the
 528                                 * EFAULT again.
 529                                 */
 530                                if (n) { 
 531                                        if (clear_user(buffer + tsz - n,
 532                                                                n))
 533                                                return -EFAULT;
 534                                }
 535                        } else {
 536                                if (clear_user(buffer, tsz))
 537                                        return -EFAULT;
 538                        }
 539                }
 540                buflen -= tsz;
 541                *fpos += tsz;
 542                buffer += tsz;
 543                acc += tsz;
 544                start += tsz;
 545                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 546        }
 547
 548        return acc;
 549}
 550
 551
 552static int open_kcore(struct inode *inode, struct file *filp)
 553{
 554        if (!capable(CAP_SYS_RAWIO))
 555                return -EPERM;
 556        if (kcore_need_update)
 557                kcore_update_ram();
 558        if (i_size_read(inode) != proc_root_kcore->size) {
 559                mutex_lock(&inode->i_mutex);
 560                i_size_write(inode, proc_root_kcore->size);
 561                mutex_unlock(&inode->i_mutex);
 562        }
 563        return 0;
 564}
 565
 566
 567static const struct file_operations proc_kcore_operations = {
 568        .read           = read_kcore,
 569        .open           = open_kcore,
 570        .llseek         = default_llseek,
 571};
 572
 573/* just remember that we have to update kcore */
 574static int __meminit kcore_callback(struct notifier_block *self,
 575                                    unsigned long action, void *arg)
 576{
 577        switch (action) {
 578        case MEM_ONLINE:
 579        case MEM_OFFLINE:
 580                write_lock(&kclist_lock);
 581                kcore_need_update = 1;
 582                write_unlock(&kclist_lock);
 583        }
 584        return NOTIFY_OK;
 585}
 586
 587static struct notifier_block kcore_callback_nb __meminitdata = {
 588        .notifier_call = kcore_callback,
 589        .priority = 0,
 590};
 591
 592static struct kcore_list kcore_vmalloc;
 593
 594#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 595static struct kcore_list kcore_text;
 596/*
 597 * If defined, special segment is used for mapping kernel text instead of
 598 * direct-map area. We need to create special TEXT section.
 599 */
 600static void __init proc_kcore_text_init(void)
 601{
 602        kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 603}
 604#else
 605static void __init proc_kcore_text_init(void)
 606{
 607}
 608#endif
 609
 610#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 611/*
 612 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 613 */
 614struct kcore_list kcore_modules;
 615static void __init add_modules_range(void)
 616{
 617        kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 618                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 619}
 620#else
 621static void __init add_modules_range(void)
 622{
 623}
 624#endif
 625
 626static int __init proc_kcore_init(void)
 627{
 628        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 629                                      &proc_kcore_operations);
 630        if (!proc_root_kcore) {
 631                pr_err("couldn't create /proc/kcore\n");
 632                return 0; /* Always returns 0. */
 633        }
 634        /* Store text area if it's special */
 635        proc_kcore_text_init();
 636        /* Store vmalloc area */
 637        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 638                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 639        add_modules_range();
 640        /* Store direct-map area from physical memory map */
 641        kcore_update_ram();
 642        register_hotmemory_notifier(&kcore_callback_nb);
 643
 644        return 0;
 645}
 646module_init(proc_kcore_init);
 647