qemu/dump.c
<<
>>
Prefs
   1/*
   2 * QEMU dump
   3 *
   4 * Copyright Fujitsu, Corp. 2011, 2012
   5 *
   6 * Authors:
   7 *     Wen Congyang <wency@cn.fujitsu.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu-common.h"
  15#include "elf.h"
  16#include "cpu.h"
  17#include "exec/cpu-all.h"
  18#include "exec/hwaddr.h"
  19#include "monitor/monitor.h"
  20#include "sysemu/kvm.h"
  21#include "sysemu/dump.h"
  22#include "sysemu/sysemu.h"
  23#include "sysemu/memory_mapping.h"
  24#include "sysemu/cpus.h"
  25#include "qapi/error.h"
  26#include "qapi/qmp/qerror.h"
  27#include "qmp-commands.h"
  28
  29#include <zlib.h>
  30#ifdef CONFIG_LZO
  31#include <lzo/lzo1x.h>
  32#endif
  33#ifdef CONFIG_SNAPPY
  34#include <snappy-c.h>
  35#endif
  36#ifndef ELF_MACHINE_UNAME
  37#define ELF_MACHINE_UNAME "Unknown"
  38#endif
  39
  40uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
  41{
  42    if (s->dump_info.d_endian == ELFDATA2LSB) {
  43        val = cpu_to_le16(val);
  44    } else {
  45        val = cpu_to_be16(val);
  46    }
  47
  48    return val;
  49}
  50
  51uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
  52{
  53    if (s->dump_info.d_endian == ELFDATA2LSB) {
  54        val = cpu_to_le32(val);
  55    } else {
  56        val = cpu_to_be32(val);
  57    }
  58
  59    return val;
  60}
  61
  62uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
  63{
  64    if (s->dump_info.d_endian == ELFDATA2LSB) {
  65        val = cpu_to_le64(val);
  66    } else {
  67        val = cpu_to_be64(val);
  68    }
  69
  70    return val;
  71}
  72
  73static int dump_cleanup(DumpState *s)
  74{
  75    guest_phys_blocks_free(&s->guest_phys_blocks);
  76    memory_mapping_list_free(&s->list);
  77    close(s->fd);
  78    if (s->resume) {
  79        vm_start();
  80    }
  81
  82    return 0;
  83}
  84
  85static void dump_error(DumpState *s, const char *reason, Error **errp)
  86{
  87    dump_cleanup(s);
  88    error_setg(errp, "%s", reason);
  89}
  90
  91static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
  92{
  93    DumpState *s = opaque;
  94    size_t written_size;
  95
  96    written_size = qemu_write_full(s->fd, buf, size);
  97    if (written_size != size) {
  98        return -1;
  99    }
 100
 101    return 0;
 102}
 103
 104static void write_elf64_header(DumpState *s, Error **errp)
 105{
 106    Elf64_Ehdr elf_header;
 107    int ret;
 108
 109    memset(&elf_header, 0, sizeof(Elf64_Ehdr));
 110    memcpy(&elf_header, ELFMAG, SELFMAG);
 111    elf_header.e_ident[EI_CLASS] = ELFCLASS64;
 112    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
 113    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
 114    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
 115    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
 116    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
 117    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
 118    elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
 119    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
 120    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
 121    if (s->have_section) {
 122        uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
 123
 124        elf_header.e_shoff = cpu_to_dump64(s, shoff);
 125        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
 126        elf_header.e_shnum = cpu_to_dump16(s, 1);
 127    }
 128
 129    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
 130    if (ret < 0) {
 131        dump_error(s, "dump: failed to write elf header", errp);
 132    }
 133}
 134
 135static void write_elf32_header(DumpState *s, Error **errp)
 136{
 137    Elf32_Ehdr elf_header;
 138    int ret;
 139
 140    memset(&elf_header, 0, sizeof(Elf32_Ehdr));
 141    memcpy(&elf_header, ELFMAG, SELFMAG);
 142    elf_header.e_ident[EI_CLASS] = ELFCLASS32;
 143    elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
 144    elf_header.e_ident[EI_VERSION] = EV_CURRENT;
 145    elf_header.e_type = cpu_to_dump16(s, ET_CORE);
 146    elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
 147    elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
 148    elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
 149    elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
 150    elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
 151    elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
 152    if (s->have_section) {
 153        uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
 154
 155        elf_header.e_shoff = cpu_to_dump32(s, shoff);
 156        elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
 157        elf_header.e_shnum = cpu_to_dump16(s, 1);
 158    }
 159
 160    ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
 161    if (ret < 0) {
 162        dump_error(s, "dump: failed to write elf header", errp);
 163    }
 164}
 165
 166static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
 167                             int phdr_index, hwaddr offset,
 168                             hwaddr filesz, Error **errp)
 169{
 170    Elf64_Phdr phdr;
 171    int ret;
 172
 173    memset(&phdr, 0, sizeof(Elf64_Phdr));
 174    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
 175    phdr.p_offset = cpu_to_dump64(s, offset);
 176    phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
 177    phdr.p_filesz = cpu_to_dump64(s, filesz);
 178    phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
 179    phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr);
 180
 181    assert(memory_mapping->length >= filesz);
 182
 183    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
 184    if (ret < 0) {
 185        dump_error(s, "dump: failed to write program header table", errp);
 186    }
 187}
 188
 189static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
 190                             int phdr_index, hwaddr offset,
 191                             hwaddr filesz, Error **errp)
 192{
 193    Elf32_Phdr phdr;
 194    int ret;
 195
 196    memset(&phdr, 0, sizeof(Elf32_Phdr));
 197    phdr.p_type = cpu_to_dump32(s, PT_LOAD);
 198    phdr.p_offset = cpu_to_dump32(s, offset);
 199    phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
 200    phdr.p_filesz = cpu_to_dump32(s, filesz);
 201    phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
 202    phdr.p_vaddr = cpu_to_dump32(s, memory_mapping->virt_addr);
 203
 204    assert(memory_mapping->length >= filesz);
 205
 206    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
 207    if (ret < 0) {
 208        dump_error(s, "dump: failed to write program header table", errp);
 209    }
 210}
 211
 212static void write_elf64_note(DumpState *s, Error **errp)
 213{
 214    Elf64_Phdr phdr;
 215    hwaddr begin = s->memory_offset - s->note_size;
 216    int ret;
 217
 218    memset(&phdr, 0, sizeof(Elf64_Phdr));
 219    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
 220    phdr.p_offset = cpu_to_dump64(s, begin);
 221    phdr.p_paddr = 0;
 222    phdr.p_filesz = cpu_to_dump64(s, s->note_size);
 223    phdr.p_memsz = cpu_to_dump64(s, s->note_size);
 224    phdr.p_vaddr = 0;
 225
 226    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
 227    if (ret < 0) {
 228        dump_error(s, "dump: failed to write program header table", errp);
 229    }
 230}
 231
 232static inline int cpu_index(CPUState *cpu)
 233{
 234    return cpu->cpu_index + 1;
 235}
 236
 237static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
 238                              Error **errp)
 239{
 240    CPUState *cpu;
 241    int ret;
 242    int id;
 243
 244    CPU_FOREACH(cpu) {
 245        id = cpu_index(cpu);
 246        ret = cpu_write_elf64_note(f, cpu, id, s);
 247        if (ret < 0) {
 248            dump_error(s, "dump: failed to write elf notes", errp);
 249            return;
 250        }
 251    }
 252
 253    CPU_FOREACH(cpu) {
 254        ret = cpu_write_elf64_qemunote(f, cpu, s);
 255        if (ret < 0) {
 256            dump_error(s, "dump: failed to write CPU status", errp);
 257            return;
 258        }
 259    }
 260}
 261
 262static void write_elf32_note(DumpState *s, Error **errp)
 263{
 264    hwaddr begin = s->memory_offset - s->note_size;
 265    Elf32_Phdr phdr;
 266    int ret;
 267
 268    memset(&phdr, 0, sizeof(Elf32_Phdr));
 269    phdr.p_type = cpu_to_dump32(s, PT_NOTE);
 270    phdr.p_offset = cpu_to_dump32(s, begin);
 271    phdr.p_paddr = 0;
 272    phdr.p_filesz = cpu_to_dump32(s, s->note_size);
 273    phdr.p_memsz = cpu_to_dump32(s, s->note_size);
 274    phdr.p_vaddr = 0;
 275
 276    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
 277    if (ret < 0) {
 278        dump_error(s, "dump: failed to write program header table", errp);
 279    }
 280}
 281
 282static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
 283                              Error **errp)
 284{
 285    CPUState *cpu;
 286    int ret;
 287    int id;
 288
 289    CPU_FOREACH(cpu) {
 290        id = cpu_index(cpu);
 291        ret = cpu_write_elf32_note(f, cpu, id, s);
 292        if (ret < 0) {
 293            dump_error(s, "dump: failed to write elf notes", errp);
 294            return;
 295        }
 296    }
 297
 298    CPU_FOREACH(cpu) {
 299        ret = cpu_write_elf32_qemunote(f, cpu, s);
 300        if (ret < 0) {
 301            dump_error(s, "dump: failed to write CPU status", errp);
 302            return;
 303        }
 304    }
 305}
 306
 307static void write_elf_section(DumpState *s, int type, Error **errp)
 308{
 309    Elf32_Shdr shdr32;
 310    Elf64_Shdr shdr64;
 311    int shdr_size;
 312    void *shdr;
 313    int ret;
 314
 315    if (type == 0) {
 316        shdr_size = sizeof(Elf32_Shdr);
 317        memset(&shdr32, 0, shdr_size);
 318        shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
 319        shdr = &shdr32;
 320    } else {
 321        shdr_size = sizeof(Elf64_Shdr);
 322        memset(&shdr64, 0, shdr_size);
 323        shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
 324        shdr = &shdr64;
 325    }
 326
 327    ret = fd_write_vmcore(&shdr, shdr_size, s);
 328    if (ret < 0) {
 329        dump_error(s, "dump: failed to write section header table", errp);
 330    }
 331}
 332
 333static void write_data(DumpState *s, void *buf, int length, Error **errp)
 334{
 335    int ret;
 336
 337    ret = fd_write_vmcore(buf, length, s);
 338    if (ret < 0) {
 339        dump_error(s, "dump: failed to save memory", errp);
 340    }
 341}
 342
 343/* write the memory to vmcore. 1 page per I/O. */
 344static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
 345                         int64_t size, Error **errp)
 346{
 347    int64_t i;
 348    Error *local_err = NULL;
 349
 350    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
 351        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
 352                   TARGET_PAGE_SIZE, &local_err);
 353        if (local_err) {
 354            error_propagate(errp, local_err);
 355            return;
 356        }
 357    }
 358
 359    if ((size % TARGET_PAGE_SIZE) != 0) {
 360        write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
 361                   size % TARGET_PAGE_SIZE, &local_err);
 362        if (local_err) {
 363            error_propagate(errp, local_err);
 364            return;
 365        }
 366    }
 367}
 368
 369/* get the memory's offset and size in the vmcore */
 370static void get_offset_range(hwaddr phys_addr,
 371                             ram_addr_t mapping_length,
 372                             DumpState *s,
 373                             hwaddr *p_offset,
 374                             hwaddr *p_filesz)
 375{
 376    GuestPhysBlock *block;
 377    hwaddr offset = s->memory_offset;
 378    int64_t size_in_block, start;
 379
 380    /* When the memory is not stored into vmcore, offset will be -1 */
 381    *p_offset = -1;
 382    *p_filesz = 0;
 383
 384    if (s->has_filter) {
 385        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
 386            return;
 387        }
 388    }
 389
 390    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
 391        if (s->has_filter) {
 392            if (block->target_start >= s->begin + s->length ||
 393                block->target_end <= s->begin) {
 394                /* This block is out of the range */
 395                continue;
 396            }
 397
 398            if (s->begin <= block->target_start) {
 399                start = block->target_start;
 400            } else {
 401                start = s->begin;
 402            }
 403
 404            size_in_block = block->target_end - start;
 405            if (s->begin + s->length < block->target_end) {
 406                size_in_block -= block->target_end - (s->begin + s->length);
 407            }
 408        } else {
 409            start = block->target_start;
 410            size_in_block = block->target_end - block->target_start;
 411        }
 412
 413        if (phys_addr >= start && phys_addr < start + size_in_block) {
 414            *p_offset = phys_addr - start + offset;
 415
 416            /* The offset range mapped from the vmcore file must not spill over
 417             * the GuestPhysBlock, clamp it. The rest of the mapping will be
 418             * zero-filled in memory at load time; see
 419             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
 420             */
 421            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
 422                        mapping_length :
 423                        size_in_block - (phys_addr - start);
 424            return;
 425        }
 426
 427        offset += size_in_block;
 428    }
 429}
 430
 431static void write_elf_loads(DumpState *s, Error **errp)
 432{
 433    hwaddr offset, filesz;
 434    MemoryMapping *memory_mapping;
 435    uint32_t phdr_index = 1;
 436    uint32_t max_index;
 437    Error *local_err = NULL;
 438
 439    if (s->have_section) {
 440        max_index = s->sh_info;
 441    } else {
 442        max_index = s->phdr_num;
 443    }
 444
 445    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
 446        get_offset_range(memory_mapping->phys_addr,
 447                         memory_mapping->length,
 448                         s, &offset, &filesz);
 449        if (s->dump_info.d_class == ELFCLASS64) {
 450            write_elf64_load(s, memory_mapping, phdr_index++, offset,
 451                             filesz, &local_err);
 452        } else {
 453            write_elf32_load(s, memory_mapping, phdr_index++, offset,
 454                             filesz, &local_err);
 455        }
 456
 457        if (local_err) {
 458            error_propagate(errp, local_err);
 459            return;
 460        }
 461
 462        if (phdr_index >= max_index) {
 463            break;
 464        }
 465    }
 466}
 467
 468/* write elf header, PT_NOTE and elf note to vmcore. */
 469static void dump_begin(DumpState *s, Error **errp)
 470{
 471    Error *local_err = NULL;
 472
 473    /*
 474     * the vmcore's format is:
 475     *   --------------
 476     *   |  elf header |
 477     *   --------------
 478     *   |  PT_NOTE    |
 479     *   --------------
 480     *   |  PT_LOAD    |
 481     *   --------------
 482     *   |  ......     |
 483     *   --------------
 484     *   |  PT_LOAD    |
 485     *   --------------
 486     *   |  sec_hdr    |
 487     *   --------------
 488     *   |  elf note   |
 489     *   --------------
 490     *   |  memory     |
 491     *   --------------
 492     *
 493     * we only know where the memory is saved after we write elf note into
 494     * vmcore.
 495     */
 496
 497    /* write elf header to vmcore */
 498    if (s->dump_info.d_class == ELFCLASS64) {
 499        write_elf64_header(s, &local_err);
 500    } else {
 501        write_elf32_header(s, &local_err);
 502    }
 503    if (local_err) {
 504        error_propagate(errp, local_err);
 505        return;
 506    }
 507
 508    if (s->dump_info.d_class == ELFCLASS64) {
 509        /* write PT_NOTE to vmcore */
 510        write_elf64_note(s, &local_err);
 511        if (local_err) {
 512            error_propagate(errp, local_err);
 513            return;
 514        }
 515
 516        /* write all PT_LOAD to vmcore */
 517        write_elf_loads(s, &local_err);
 518        if (local_err) {
 519            error_propagate(errp, local_err);
 520            return;
 521        }
 522
 523        /* write section to vmcore */
 524        if (s->have_section) {
 525            write_elf_section(s, 1, &local_err);
 526            if (local_err) {
 527                error_propagate(errp, local_err);
 528                return;
 529            }
 530        }
 531
 532        /* write notes to vmcore */
 533        write_elf64_notes(fd_write_vmcore, s, &local_err);
 534        if (local_err) {
 535            error_propagate(errp, local_err);
 536            return;
 537        }
 538    } else {
 539        /* write PT_NOTE to vmcore */
 540        write_elf32_note(s, &local_err);
 541        if (local_err) {
 542            error_propagate(errp, local_err);
 543            return;
 544        }
 545
 546        /* write all PT_LOAD to vmcore */
 547        write_elf_loads(s, &local_err);
 548        if (local_err) {
 549            error_propagate(errp, local_err);
 550            return;
 551        }
 552
 553        /* write section to vmcore */
 554        if (s->have_section) {
 555            write_elf_section(s, 0, &local_err);
 556            if (local_err) {
 557                error_propagate(errp, local_err);
 558                return;
 559            }
 560        }
 561
 562        /* write notes to vmcore */
 563        write_elf32_notes(fd_write_vmcore, s, &local_err);
 564        if (local_err) {
 565            error_propagate(errp, local_err);
 566            return;
 567        }
 568    }
 569}
 570
 571static void dump_completed(DumpState *s)
 572{
 573    dump_cleanup(s);
 574}
 575
 576static int get_next_block(DumpState *s, GuestPhysBlock *block)
 577{
 578    while (1) {
 579        block = QTAILQ_NEXT(block, next);
 580        if (!block) {
 581            /* no more block */
 582            return 1;
 583        }
 584
 585        s->start = 0;
 586        s->next_block = block;
 587        if (s->has_filter) {
 588            if (block->target_start >= s->begin + s->length ||
 589                block->target_end <= s->begin) {
 590                /* This block is out of the range */
 591                continue;
 592            }
 593
 594            if (s->begin > block->target_start) {
 595                s->start = s->begin - block->target_start;
 596            }
 597        }
 598
 599        return 0;
 600    }
 601}
 602
 603/* write all memory to vmcore */
 604static void dump_iterate(DumpState *s, Error **errp)
 605{
 606    GuestPhysBlock *block;
 607    int64_t size;
 608    Error *local_err = NULL;
 609
 610    do {
 611        block = s->next_block;
 612
 613        size = block->target_end - block->target_start;
 614        if (s->has_filter) {
 615            size -= s->start;
 616            if (s->begin + s->length < block->target_end) {
 617                size -= block->target_end - (s->begin + s->length);
 618            }
 619        }
 620        write_memory(s, block, s->start, size, &local_err);
 621        if (local_err) {
 622            error_propagate(errp, local_err);
 623            return;
 624        }
 625
 626    } while (!get_next_block(s, block));
 627
 628    dump_completed(s);
 629}
 630
 631static void create_vmcore(DumpState *s, Error **errp)
 632{
 633    Error *local_err = NULL;
 634
 635    dump_begin(s, &local_err);
 636    if (local_err) {
 637        error_propagate(errp, local_err);
 638        return;
 639    }
 640
 641    dump_iterate(s, errp);
 642}
 643
 644static int write_start_flat_header(int fd)
 645{
 646    MakedumpfileHeader *mh;
 647    int ret = 0;
 648
 649    QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
 650    mh = g_malloc0(MAX_SIZE_MDF_HEADER);
 651
 652    memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
 653           MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
 654
 655    mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
 656    mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
 657
 658    size_t written_size;
 659    written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
 660    if (written_size != MAX_SIZE_MDF_HEADER) {
 661        ret = -1;
 662    }
 663
 664    g_free(mh);
 665    return ret;
 666}
 667
 668static int write_end_flat_header(int fd)
 669{
 670    MakedumpfileDataHeader mdh;
 671
 672    mdh.offset = END_FLAG_FLAT_HEADER;
 673    mdh.buf_size = END_FLAG_FLAT_HEADER;
 674
 675    size_t written_size;
 676    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
 677    if (written_size != sizeof(mdh)) {
 678        return -1;
 679    }
 680
 681    return 0;
 682}
 683
 684static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
 685{
 686    size_t written_size;
 687    MakedumpfileDataHeader mdh;
 688
 689    mdh.offset = cpu_to_be64(offset);
 690    mdh.buf_size = cpu_to_be64(size);
 691
 692    written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
 693    if (written_size != sizeof(mdh)) {
 694        return -1;
 695    }
 696
 697    written_size = qemu_write_full(fd, buf, size);
 698    if (written_size != size) {
 699        return -1;
 700    }
 701
 702    return 0;
 703}
 704
 705static int buf_write_note(const void *buf, size_t size, void *opaque)
 706{
 707    DumpState *s = opaque;
 708
 709    /* note_buf is not enough */
 710    if (s->note_buf_offset + size > s->note_size) {
 711        return -1;
 712    }
 713
 714    memcpy(s->note_buf + s->note_buf_offset, buf, size);
 715
 716    s->note_buf_offset += size;
 717
 718    return 0;
 719}
 720
 721/* write common header, sub header and elf note to vmcore */
 722static void create_header32(DumpState *s, Error **errp)
 723{
 724    DiskDumpHeader32 *dh = NULL;
 725    KdumpSubHeader32 *kh = NULL;
 726    size_t size;
 727    uint32_t block_size;
 728    uint32_t sub_hdr_size;
 729    uint32_t bitmap_blocks;
 730    uint32_t status = 0;
 731    uint64_t offset_note;
 732    Error *local_err = NULL;
 733
 734    /* write common header, the version of kdump-compressed format is 6th */
 735    size = sizeof(DiskDumpHeader32);
 736    dh = g_malloc0(size);
 737
 738    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
 739    dh->header_version = cpu_to_dump32(s, 6);
 740    block_size = TARGET_PAGE_SIZE;
 741    dh->block_size = cpu_to_dump32(s, block_size);
 742    sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
 743    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
 744    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
 745    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
 746    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
 747    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
 748    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
 749    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
 750    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
 751
 752    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
 753        status |= DUMP_DH_COMPRESSED_ZLIB;
 754    }
 755#ifdef CONFIG_LZO
 756    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
 757        status |= DUMP_DH_COMPRESSED_LZO;
 758    }
 759#endif
 760#ifdef CONFIG_SNAPPY
 761    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
 762        status |= DUMP_DH_COMPRESSED_SNAPPY;
 763    }
 764#endif
 765    dh->status = cpu_to_dump32(s, status);
 766
 767    if (write_buffer(s->fd, 0, dh, size) < 0) {
 768        dump_error(s, "dump: failed to write disk dump header", errp);
 769        goto out;
 770    }
 771
 772    /* write sub header */
 773    size = sizeof(KdumpSubHeader32);
 774    kh = g_malloc0(size);
 775
 776    /* 64bit max_mapnr_64 */
 777    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
 778    kh->phys_base = cpu_to_dump32(s, PHYS_BASE);
 779    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
 780
 781    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
 782    kh->offset_note = cpu_to_dump64(s, offset_note);
 783    kh->note_size = cpu_to_dump32(s, s->note_size);
 784
 785    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
 786                     block_size, kh, size) < 0) {
 787        dump_error(s, "dump: failed to write kdump sub header", errp);
 788        goto out;
 789    }
 790
 791    /* write note */
 792    s->note_buf = g_malloc0(s->note_size);
 793    s->note_buf_offset = 0;
 794
 795    /* use s->note_buf to store notes temporarily */
 796    write_elf32_notes(buf_write_note, s, &local_err);
 797    if (local_err) {
 798        error_propagate(errp, local_err);
 799        goto out;
 800    }
 801    if (write_buffer(s->fd, offset_note, s->note_buf,
 802                     s->note_size) < 0) {
 803        dump_error(s, "dump: failed to write notes", errp);
 804        goto out;
 805    }
 806
 807    /* get offset of dump_bitmap */
 808    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
 809                             block_size;
 810
 811    /* get offset of page */
 812    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
 813                     block_size;
 814
 815out:
 816    g_free(dh);
 817    g_free(kh);
 818    g_free(s->note_buf);
 819}
 820
 821/* write common header, sub header and elf note to vmcore */
 822static void create_header64(DumpState *s, Error **errp)
 823{
 824    DiskDumpHeader64 *dh = NULL;
 825    KdumpSubHeader64 *kh = NULL;
 826    size_t size;
 827    uint32_t block_size;
 828    uint32_t sub_hdr_size;
 829    uint32_t bitmap_blocks;
 830    uint32_t status = 0;
 831    uint64_t offset_note;
 832    Error *local_err = NULL;
 833
 834    /* write common header, the version of kdump-compressed format is 6th */
 835    size = sizeof(DiskDumpHeader64);
 836    dh = g_malloc0(size);
 837
 838    strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
 839    dh->header_version = cpu_to_dump32(s, 6);
 840    block_size = TARGET_PAGE_SIZE;
 841    dh->block_size = cpu_to_dump32(s, block_size);
 842    sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
 843    sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
 844    dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
 845    /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
 846    dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
 847    dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
 848    bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
 849    dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
 850    strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
 851
 852    if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
 853        status |= DUMP_DH_COMPRESSED_ZLIB;
 854    }
 855#ifdef CONFIG_LZO
 856    if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
 857        status |= DUMP_DH_COMPRESSED_LZO;
 858    }
 859#endif
 860#ifdef CONFIG_SNAPPY
 861    if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
 862        status |= DUMP_DH_COMPRESSED_SNAPPY;
 863    }
 864#endif
 865    dh->status = cpu_to_dump32(s, status);
 866
 867    if (write_buffer(s->fd, 0, dh, size) < 0) {
 868        dump_error(s, "dump: failed to write disk dump header", errp);
 869        goto out;
 870    }
 871
 872    /* write sub header */
 873    size = sizeof(KdumpSubHeader64);
 874    kh = g_malloc0(size);
 875
 876    /* 64bit max_mapnr_64 */
 877    kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
 878    kh->phys_base = cpu_to_dump64(s, PHYS_BASE);
 879    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
 880
 881    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
 882    kh->offset_note = cpu_to_dump64(s, offset_note);
 883    kh->note_size = cpu_to_dump64(s, s->note_size);
 884
 885    if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
 886                     block_size, kh, size) < 0) {
 887        dump_error(s, "dump: failed to write kdump sub header", errp);
 888        goto out;
 889    }
 890
 891    /* write note */
 892    s->note_buf = g_malloc0(s->note_size);
 893    s->note_buf_offset = 0;
 894
 895    /* use s->note_buf to store notes temporarily */
 896    write_elf64_notes(buf_write_note, s, &local_err);
 897    if (local_err) {
 898        error_propagate(errp, local_err);
 899        goto out;
 900    }
 901
 902    if (write_buffer(s->fd, offset_note, s->note_buf,
 903                     s->note_size) < 0) {
 904        dump_error(s, "dump: failed to write notes", errp);
 905        goto out;
 906    }
 907
 908    /* get offset of dump_bitmap */
 909    s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
 910                             block_size;
 911
 912    /* get offset of page */
 913    s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
 914                     block_size;
 915
 916out:
 917    g_free(dh);
 918    g_free(kh);
 919    g_free(s->note_buf);
 920}
 921
 922static void write_dump_header(DumpState *s, Error **errp)
 923{
 924     Error *local_err = NULL;
 925
 926    if (s->dump_info.d_class == ELFCLASS32) {
 927        create_header32(s, &local_err);
 928    } else {
 929        create_header64(s, &local_err);
 930    }
 931    if (local_err) {
 932        error_propagate(errp, local_err);
 933    }
 934}
 935
 936/*
 937 * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
 938 * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
 939 * set_dump_bitmap will always leave the recently set bit un-sync. And setting
 940 * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
 941 * vmcore, ie. synchronizing un-sync bit into vmcore.
 942 */
 943static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
 944                           uint8_t *buf, DumpState *s)
 945{
 946    off_t old_offset, new_offset;
 947    off_t offset_bitmap1, offset_bitmap2;
 948    uint32_t byte, bit;
 949
 950    /* should not set the previous place */
 951    assert(last_pfn <= pfn);
 952
 953    /*
 954     * if the bit needed to be set is not cached in buf, flush the data in buf
 955     * to vmcore firstly.
 956     * making new_offset be bigger than old_offset can also sync remained data
 957     * into vmcore.
 958     */
 959    old_offset = BUFSIZE_BITMAP * (last_pfn / PFN_BUFBITMAP);
 960    new_offset = BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP);
 961
 962    while (old_offset < new_offset) {
 963        /* calculate the offset and write dump_bitmap */
 964        offset_bitmap1 = s->offset_dump_bitmap + old_offset;
 965        if (write_buffer(s->fd, offset_bitmap1, buf,
 966                         BUFSIZE_BITMAP) < 0) {
 967            return -1;
 968        }
 969
 970        /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
 971        offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
 972                         old_offset;
 973        if (write_buffer(s->fd, offset_bitmap2, buf,
 974                         BUFSIZE_BITMAP) < 0) {
 975            return -1;
 976        }
 977
 978        memset(buf, 0, BUFSIZE_BITMAP);
 979        old_offset += BUFSIZE_BITMAP;
 980    }
 981
 982    /* get the exact place of the bit in the buf, and set it */
 983    byte = (pfn % PFN_BUFBITMAP) / CHAR_BIT;
 984    bit = (pfn % PFN_BUFBITMAP) % CHAR_BIT;
 985    if (value) {
 986        buf[byte] |= 1u << bit;
 987    } else {
 988        buf[byte] &= ~(1u << bit);
 989    }
 990
 991    return 0;
 992}
 993
 994/*
 995 * exam every page and return the page frame number and the address of the page.
 996 * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
 997 * blocks, so block->target_start and block->target_end should be interal
 998 * multiples of the target page size.
 999 */
1000static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1001                          uint8_t **bufptr, DumpState *s)
1002{
1003    GuestPhysBlock *block = *blockptr;
1004    hwaddr addr;
1005    uint8_t *buf;
1006
1007    /* block == NULL means the start of the iteration */
1008    if (!block) {
1009        block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1010        *blockptr = block;
1011        assert((block->target_start & ~TARGET_PAGE_MASK) == 0);
1012        assert((block->target_end & ~TARGET_PAGE_MASK) == 0);
1013        *pfnptr = paddr_to_pfn(block->target_start);
1014        if (bufptr) {
1015            *bufptr = block->host_addr;
1016        }
1017        return true;
1018    }
1019
1020    *pfnptr = *pfnptr + 1;
1021    addr = pfn_to_paddr(*pfnptr);
1022
1023    if ((addr >= block->target_start) &&
1024        (addr + TARGET_PAGE_SIZE <= block->target_end)) {
1025        buf = block->host_addr + (addr - block->target_start);
1026    } else {
1027        /* the next page is in the next block */
1028        block = QTAILQ_NEXT(block, next);
1029        *blockptr = block;
1030        if (!block) {
1031            return false;
1032        }
1033        assert((block->target_start & ~TARGET_PAGE_MASK) == 0);
1034        assert((block->target_end & ~TARGET_PAGE_MASK) == 0);
1035        *pfnptr = paddr_to_pfn(block->target_start);
1036        buf = block->host_addr;
1037    }
1038
1039    if (bufptr) {
1040        *bufptr = buf;
1041    }
1042
1043    return true;
1044}
1045
1046static void write_dump_bitmap(DumpState *s, Error **errp)
1047{
1048    int ret = 0;
1049    uint64_t last_pfn, pfn;
1050    void *dump_bitmap_buf;
1051    size_t num_dumpable;
1052    GuestPhysBlock *block_iter = NULL;
1053
1054    /* dump_bitmap_buf is used to store dump_bitmap temporarily */
1055    dump_bitmap_buf = g_malloc0(BUFSIZE_BITMAP);
1056
1057    num_dumpable = 0;
1058    last_pfn = 0;
1059
1060    /*
1061     * exam memory page by page, and set the bit in dump_bitmap corresponded
1062     * to the existing page.
1063     */
1064    while (get_next_page(&block_iter, &pfn, NULL, s)) {
1065        ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
1066        if (ret < 0) {
1067            dump_error(s, "dump: failed to set dump_bitmap", errp);
1068            goto out;
1069        }
1070
1071        last_pfn = pfn;
1072        num_dumpable++;
1073    }
1074
1075    /*
1076     * set_dump_bitmap will always leave the recently set bit un-sync. Here we
1077     * set last_pfn + PFN_BUFBITMAP to 0 and those set but un-sync bit will be
1078     * synchronized into vmcore.
1079     */
1080    if (num_dumpable > 0) {
1081        ret = set_dump_bitmap(last_pfn, last_pfn + PFN_BUFBITMAP, false,
1082                              dump_bitmap_buf, s);
1083        if (ret < 0) {
1084            dump_error(s, "dump: failed to sync dump_bitmap", errp);
1085            goto out;
1086        }
1087    }
1088
1089    /* number of dumpable pages that will be dumped later */
1090    s->num_dumpable = num_dumpable;
1091
1092out:
1093    g_free(dump_bitmap_buf);
1094}
1095
1096static void prepare_data_cache(DataCache *data_cache, DumpState *s,
1097                               off_t offset)
1098{
1099    data_cache->fd = s->fd;
1100    data_cache->data_size = 0;
1101    data_cache->buf_size = BUFSIZE_DATA_CACHE;
1102    data_cache->buf = g_malloc0(BUFSIZE_DATA_CACHE);
1103    data_cache->offset = offset;
1104}
1105
1106static int write_cache(DataCache *dc, const void *buf, size_t size,
1107                       bool flag_sync)
1108{
1109    /*
1110     * dc->buf_size should not be less than size, otherwise dc will never be
1111     * enough
1112     */
1113    assert(size <= dc->buf_size);
1114
1115    /*
1116     * if flag_sync is set, synchronize data in dc->buf into vmcore.
1117     * otherwise check if the space is enough for caching data in buf, if not,
1118     * write the data in dc->buf to dc->fd and reset dc->buf
1119     */
1120    if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
1121        (flag_sync && dc->data_size > 0)) {
1122        if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
1123            return -1;
1124        }
1125
1126        dc->offset += dc->data_size;
1127        dc->data_size = 0;
1128    }
1129
1130    if (!flag_sync) {
1131        memcpy(dc->buf + dc->data_size, buf, size);
1132        dc->data_size += size;
1133    }
1134
1135    return 0;
1136}
1137
1138static void free_data_cache(DataCache *data_cache)
1139{
1140    g_free(data_cache->buf);
1141}
1142
1143static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
1144{
1145    switch (flag_compress) {
1146    case DUMP_DH_COMPRESSED_ZLIB:
1147        return compressBound(page_size);
1148
1149    case DUMP_DH_COMPRESSED_LZO:
1150        /*
1151         * LZO will expand incompressible data by a little amount. Please check
1152         * the following URL to see the expansion calculation:
1153         * http://www.oberhumer.com/opensource/lzo/lzofaq.php
1154         */
1155        return page_size + page_size / 16 + 64 + 3;
1156
1157#ifdef CONFIG_SNAPPY
1158    case DUMP_DH_COMPRESSED_SNAPPY:
1159        return snappy_max_compressed_length(page_size);
1160#endif
1161    }
1162    return 0;
1163}
1164
1165/*
1166 * check if the page is all 0
1167 */
1168static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
1169{
1170    return buffer_is_zero(buf, page_size);
1171}
1172
1173static void write_dump_pages(DumpState *s, Error **errp)
1174{
1175    int ret = 0;
1176    DataCache page_desc, page_data;
1177    size_t len_buf_out, size_out;
1178#ifdef CONFIG_LZO
1179    lzo_bytep wrkmem = NULL;
1180#endif
1181    uint8_t *buf_out = NULL;
1182    off_t offset_desc, offset_data;
1183    PageDescriptor pd, pd_zero;
1184    uint8_t *buf;
1185    GuestPhysBlock *block_iter = NULL;
1186    uint64_t pfn_iter;
1187
1188    /* get offset of page_desc and page_data in dump file */
1189    offset_desc = s->offset_page;
1190    offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
1191
1192    prepare_data_cache(&page_desc, s, offset_desc);
1193    prepare_data_cache(&page_data, s, offset_data);
1194
1195    /* prepare buffer to store compressed data */
1196    len_buf_out = get_len_buf_out(TARGET_PAGE_SIZE, s->flag_compress);
1197    assert(len_buf_out != 0);
1198
1199#ifdef CONFIG_LZO
1200    wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
1201#endif
1202
1203    buf_out = g_malloc(len_buf_out);
1204
1205    /*
1206     * init zero page's page_desc and page_data, because every zero page
1207     * uses the same page_data
1208     */
1209    pd_zero.size = cpu_to_dump32(s, TARGET_PAGE_SIZE);
1210    pd_zero.flags = cpu_to_dump32(s, 0);
1211    pd_zero.offset = cpu_to_dump64(s, offset_data);
1212    pd_zero.page_flags = cpu_to_dump64(s, 0);
1213    buf = g_malloc0(TARGET_PAGE_SIZE);
1214    ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
1215    g_free(buf);
1216    if (ret < 0) {
1217        dump_error(s, "dump: failed to write page data (zero page)", errp);
1218        goto out;
1219    }
1220
1221    offset_data += TARGET_PAGE_SIZE;
1222
1223    /*
1224     * dump memory to vmcore page by page. zero page will all be resided in the
1225     * first page of page section
1226     */
1227    while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
1228        /* check zero page */
1229        if (is_zero_page(buf, TARGET_PAGE_SIZE)) {
1230            ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
1231                              false);
1232            if (ret < 0) {
1233                dump_error(s, "dump: failed to write page desc", errp);
1234                goto out;
1235            }
1236        } else {
1237            /*
1238             * not zero page, then:
1239             * 1. compress the page
1240             * 2. write the compressed page into the cache of page_data
1241             * 3. get page desc of the compressed page and write it into the
1242             *    cache of page_desc
1243             *
1244             * only one compression format will be used here, for
1245             * s->flag_compress is set. But when compression fails to work,
1246             * we fall back to save in plaintext.
1247             */
1248             size_out = len_buf_out;
1249             if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
1250                    (compress2(buf_out, (uLongf *)&size_out, buf,
1251                               TARGET_PAGE_SIZE, Z_BEST_SPEED) == Z_OK) &&
1252                    (size_out < TARGET_PAGE_SIZE)) {
1253                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
1254                pd.size  = cpu_to_dump32(s, size_out);
1255
1256                ret = write_cache(&page_data, buf_out, size_out, false);
1257                if (ret < 0) {
1258                    dump_error(s, "dump: failed to write page data", errp);
1259                    goto out;
1260                }
1261#ifdef CONFIG_LZO
1262            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
1263                    (lzo1x_1_compress(buf, TARGET_PAGE_SIZE, buf_out,
1264                    (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
1265                    (size_out < TARGET_PAGE_SIZE)) {
1266                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
1267                pd.size  = cpu_to_dump32(s, size_out);
1268
1269                ret = write_cache(&page_data, buf_out, size_out, false);
1270                if (ret < 0) {
1271                    dump_error(s, "dump: failed to write page data", errp);
1272                    goto out;
1273                }
1274#endif
1275#ifdef CONFIG_SNAPPY
1276            } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
1277                    (snappy_compress((char *)buf, TARGET_PAGE_SIZE,
1278                    (char *)buf_out, &size_out) == SNAPPY_OK) &&
1279                    (size_out < TARGET_PAGE_SIZE)) {
1280                pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
1281                pd.size  = cpu_to_dump32(s, size_out);
1282
1283                ret = write_cache(&page_data, buf_out, size_out, false);
1284                if (ret < 0) {
1285                    dump_error(s, "dump: failed to write page data", errp);
1286                    goto out;
1287                }
1288#endif
1289            } else {
1290                /*
1291                 * fall back to save in plaintext, size_out should be
1292                 * assigned TARGET_PAGE_SIZE
1293                 */
1294                pd.flags = cpu_to_dump32(s, 0);
1295                size_out = TARGET_PAGE_SIZE;
1296                pd.size = cpu_to_dump32(s, size_out);
1297
1298                ret = write_cache(&page_data, buf, TARGET_PAGE_SIZE, false);
1299                if (ret < 0) {
1300                    dump_error(s, "dump: failed to write page data", errp);
1301                    goto out;
1302                }
1303            }
1304
1305            /* get and write page desc here */
1306            pd.page_flags = cpu_to_dump64(s, 0);
1307            pd.offset = cpu_to_dump64(s, offset_data);
1308            offset_data += size_out;
1309
1310            ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
1311            if (ret < 0) {
1312                dump_error(s, "dump: failed to write page desc", errp);
1313                goto out;
1314            }
1315        }
1316    }
1317
1318    ret = write_cache(&page_desc, NULL, 0, true);
1319    if (ret < 0) {
1320        dump_error(s, "dump: failed to sync cache for page_desc", errp);
1321        goto out;
1322    }
1323    ret = write_cache(&page_data, NULL, 0, true);
1324    if (ret < 0) {
1325        dump_error(s, "dump: failed to sync cache for page_data", errp);
1326        goto out;
1327    }
1328
1329out:
1330    free_data_cache(&page_desc);
1331    free_data_cache(&page_data);
1332
1333#ifdef CONFIG_LZO
1334    g_free(wrkmem);
1335#endif
1336
1337    g_free(buf_out);
1338}
1339
1340static void create_kdump_vmcore(DumpState *s, Error **errp)
1341{
1342    int ret;
1343    Error *local_err = NULL;
1344
1345    /*
1346     * the kdump-compressed format is:
1347     *                                               File offset
1348     *  +------------------------------------------+ 0x0
1349     *  |    main header (struct disk_dump_header) |
1350     *  |------------------------------------------+ block 1
1351     *  |    sub header (struct kdump_sub_header)  |
1352     *  |------------------------------------------+ block 2
1353     *  |            1st-dump_bitmap               |
1354     *  |------------------------------------------+ block 2 + X blocks
1355     *  |            2nd-dump_bitmap               | (aligned by block)
1356     *  |------------------------------------------+ block 2 + 2 * X blocks
1357     *  |  page desc for pfn 0 (struct page_desc)  | (aligned by block)
1358     *  |  page desc for pfn 1 (struct page_desc)  |
1359     *  |                    :                     |
1360     *  |------------------------------------------| (not aligned by block)
1361     *  |         page data (pfn 0)                |
1362     *  |         page data (pfn 1)                |
1363     *  |                    :                     |
1364     *  +------------------------------------------+
1365     */
1366
1367    ret = write_start_flat_header(s->fd);
1368    if (ret < 0) {
1369        dump_error(s, "dump: failed to write start flat header", errp);
1370        return;
1371    }
1372
1373    write_dump_header(s, &local_err);
1374    if (local_err) {
1375        error_propagate(errp, local_err);
1376        return;
1377    }
1378
1379    write_dump_bitmap(s, &local_err);
1380    if (local_err) {
1381        error_propagate(errp, local_err);
1382        return;
1383    }
1384
1385    write_dump_pages(s, &local_err);
1386    if (local_err) {
1387        error_propagate(errp, local_err);
1388        return;
1389    }
1390
1391    ret = write_end_flat_header(s->fd);
1392    if (ret < 0) {
1393        dump_error(s, "dump: failed to write end flat header", errp);
1394        return;
1395    }
1396
1397    dump_completed(s);
1398}
1399
1400static ram_addr_t get_start_block(DumpState *s)
1401{
1402    GuestPhysBlock *block;
1403
1404    if (!s->has_filter) {
1405        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
1406        return 0;
1407    }
1408
1409    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
1410        if (block->target_start >= s->begin + s->length ||
1411            block->target_end <= s->begin) {
1412            /* This block is out of the range */
1413            continue;
1414        }
1415
1416        s->next_block = block;
1417        if (s->begin > block->target_start) {
1418            s->start = s->begin - block->target_start;
1419        } else {
1420            s->start = 0;
1421        }
1422        return s->start;
1423    }
1424
1425    return -1;
1426}
1427
1428static void get_max_mapnr(DumpState *s)
1429{
1430    GuestPhysBlock *last_block;
1431
1432    last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
1433    s->max_mapnr = paddr_to_pfn(last_block->target_end);
1434}
1435
1436static void dump_init(DumpState *s, int fd, bool has_format,
1437                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
1438                      int64_t begin, int64_t length, Error **errp)
1439{
1440    CPUState *cpu;
1441    int nr_cpus;
1442    Error *err = NULL;
1443    int ret;
1444
1445    /* kdump-compressed is conflict with paging and filter */
1446    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1447        assert(!paging && !has_filter);
1448    }
1449
1450    if (runstate_is_running()) {
1451        vm_stop(RUN_STATE_SAVE_VM);
1452        s->resume = true;
1453    } else {
1454        s->resume = false;
1455    }
1456
1457    /* If we use KVM, we should synchronize the registers before we get dump
1458     * info or physmap info.
1459     */
1460    cpu_synchronize_all_states();
1461    nr_cpus = 0;
1462    CPU_FOREACH(cpu) {
1463        nr_cpus++;
1464    }
1465
1466    s->fd = fd;
1467    s->has_filter = has_filter;
1468    s->begin = begin;
1469    s->length = length;
1470
1471    memory_mapping_list_init(&s->list);
1472
1473    guest_phys_blocks_init(&s->guest_phys_blocks);
1474    guest_phys_blocks_append(&s->guest_phys_blocks);
1475
1476    s->start = get_start_block(s);
1477    if (s->start == -1) {
1478        error_setg(errp, QERR_INVALID_PARAMETER, "begin");
1479        goto cleanup;
1480    }
1481
1482    /* get dump info: endian, class and architecture.
1483     * If the target architecture is not supported, cpu_get_dump_info() will
1484     * return -1.
1485     */
1486    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
1487    if (ret < 0) {
1488        error_setg(errp, QERR_UNSUPPORTED);
1489        goto cleanup;
1490    }
1491
1492    s->note_size = cpu_get_note_size(s->dump_info.d_class,
1493                                     s->dump_info.d_machine, nr_cpus);
1494    if (s->note_size < 0) {
1495        error_setg(errp, QERR_UNSUPPORTED);
1496        goto cleanup;
1497    }
1498
1499    /* get memory mapping */
1500    if (paging) {
1501        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
1502        if (err != NULL) {
1503            error_propagate(errp, err);
1504            goto cleanup;
1505        }
1506    } else {
1507        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
1508    }
1509
1510    s->nr_cpus = nr_cpus;
1511
1512    get_max_mapnr(s);
1513
1514    uint64_t tmp;
1515    tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT), TARGET_PAGE_SIZE);
1516    s->len_dump_bitmap = tmp * TARGET_PAGE_SIZE;
1517
1518    /* init for kdump-compressed format */
1519    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1520        switch (format) {
1521        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
1522            s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
1523            break;
1524
1525        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
1526#ifdef CONFIG_LZO
1527            if (lzo_init() != LZO_E_OK) {
1528                error_setg(errp, "failed to initialize the LZO library");
1529                goto cleanup;
1530            }
1531#endif
1532            s->flag_compress = DUMP_DH_COMPRESSED_LZO;
1533            break;
1534
1535        case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
1536            s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
1537            break;
1538
1539        default:
1540            s->flag_compress = 0;
1541        }
1542
1543        return;
1544    }
1545
1546    if (s->has_filter) {
1547        memory_mapping_filter(&s->list, s->begin, s->length);
1548    }
1549
1550    /*
1551     * calculate phdr_num
1552     *
1553     * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
1554     */
1555    s->phdr_num = 1; /* PT_NOTE */
1556    if (s->list.num < UINT16_MAX - 2) {
1557        s->phdr_num += s->list.num;
1558        s->have_section = false;
1559    } else {
1560        s->have_section = true;
1561        s->phdr_num = PN_XNUM;
1562        s->sh_info = 1; /* PT_NOTE */
1563
1564        /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
1565        if (s->list.num <= UINT32_MAX - 1) {
1566            s->sh_info += s->list.num;
1567        } else {
1568            s->sh_info = UINT32_MAX;
1569        }
1570    }
1571
1572    if (s->dump_info.d_class == ELFCLASS64) {
1573        if (s->have_section) {
1574            s->memory_offset = sizeof(Elf64_Ehdr) +
1575                               sizeof(Elf64_Phdr) * s->sh_info +
1576                               sizeof(Elf64_Shdr) + s->note_size;
1577        } else {
1578            s->memory_offset = sizeof(Elf64_Ehdr) +
1579                               sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
1580        }
1581    } else {
1582        if (s->have_section) {
1583            s->memory_offset = sizeof(Elf32_Ehdr) +
1584                               sizeof(Elf32_Phdr) * s->sh_info +
1585                               sizeof(Elf32_Shdr) + s->note_size;
1586        } else {
1587            s->memory_offset = sizeof(Elf32_Ehdr) +
1588                               sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
1589        }
1590    }
1591
1592    return;
1593
1594cleanup:
1595    dump_cleanup(s);
1596}
1597
1598void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
1599                           int64_t begin, bool has_length,
1600                           int64_t length, bool has_format,
1601                           DumpGuestMemoryFormat format, Error **errp)
1602{
1603    const char *p;
1604    int fd = -1;
1605    DumpState *s;
1606    Error *local_err = NULL;
1607
1608    /*
1609     * kdump-compressed format need the whole memory dumped, so paging or
1610     * filter is not supported here.
1611     */
1612    if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
1613        (paging || has_begin || has_length)) {
1614        error_setg(errp, "kdump-compressed format doesn't support paging or "
1615                         "filter");
1616        return;
1617    }
1618    if (has_begin && !has_length) {
1619        error_setg(errp, QERR_MISSING_PARAMETER, "length");
1620        return;
1621    }
1622    if (!has_begin && has_length) {
1623        error_setg(errp, QERR_MISSING_PARAMETER, "begin");
1624        return;
1625    }
1626
1627    /* check whether lzo/snappy is supported */
1628#ifndef CONFIG_LZO
1629    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
1630        error_setg(errp, "kdump-lzo is not available now");
1631        return;
1632    }
1633#endif
1634
1635#ifndef CONFIG_SNAPPY
1636    if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
1637        error_setg(errp, "kdump-snappy is not available now");
1638        return;
1639    }
1640#endif
1641
1642#if !defined(WIN32)
1643    if (strstart(file, "fd:", &p)) {
1644        fd = monitor_get_fd(cur_mon, p, errp);
1645        if (fd == -1) {
1646            return;
1647        }
1648    }
1649#endif
1650
1651    if  (strstart(file, "file:", &p)) {
1652        fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
1653        if (fd < 0) {
1654            error_setg_file_open(errp, errno, p);
1655            return;
1656        }
1657    }
1658
1659    if (fd == -1) {
1660        error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
1661        return;
1662    }
1663
1664    s = g_malloc0(sizeof(DumpState));
1665
1666    dump_init(s, fd, has_format, format, paging, has_begin,
1667              begin, length, &local_err);
1668    if (local_err) {
1669        g_free(s);
1670        error_propagate(errp, local_err);
1671        return;
1672    }
1673
1674    if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
1675        create_kdump_vmcore(s, errp);
1676    } else {
1677        create_vmcore(s, errp);
1678    }
1679
1680    g_free(s);
1681}
1682
1683DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
1684{
1685    DumpGuestMemoryFormatList *item;
1686    DumpGuestMemoryCapability *cap =
1687                                  g_malloc0(sizeof(DumpGuestMemoryCapability));
1688
1689    /* elf is always available */
1690    item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1691    cap->formats = item;
1692    item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;
1693
1694    /* kdump-zlib is always available */
1695    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1696    item = item->next;
1697    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
1698
1699    /* add new item if kdump-lzo is available */
1700#ifdef CONFIG_LZO
1701    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1702    item = item->next;
1703    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
1704#endif
1705
1706    /* add new item if kdump-snappy is available */
1707#ifdef CONFIG_SNAPPY
1708    item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
1709    item = item->next;
1710    item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
1711#endif
1712
1713    return cap;
1714}
1715