qemu/qga/commands-posix.c
<<
>>
Prefs
   1/*
   2 * QEMU Guest Agent POSIX-specific command implementations
   3 *
   4 * Copyright IBM Corp. 2011
   5 *
   6 * Authors:
   7 *  Michael Roth      <mdroth@linux.vnet.ibm.com>
   8 *  Michal Privoznik  <mprivozn@redhat.com>
   9 *
  10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  11 * See the COPYING file in the top-level directory.
  12 */
  13
  14#include "qemu/osdep.h"
  15#include <sys/ioctl.h>
  16#include <sys/utsname.h>
  17#include <sys/wait.h>
  18#include <dirent.h>
  19#include "qga-qapi-commands.h"
  20#include "qapi/error.h"
  21#include "qapi/qmp/qerror.h"
  22#include "qemu/host-utils.h"
  23#include "qemu/sockets.h"
  24#include "qemu/base64.h"
  25#include "qemu/cutils.h"
  26#include "commands-common.h"
  27#include "block/nvme.h"
  28#include "cutils.h"
  29
  30#ifdef HAVE_UTMPX
  31#include <utmpx.h>
  32#endif
  33
  34#if defined(__linux__)
  35#include <mntent.h>
  36#include <sys/statvfs.h>
  37#include <linux/nvme_ioctl.h>
  38
  39#ifdef CONFIG_LIBUDEV
  40#include <libudev.h>
  41#endif
  42#endif
  43
  44#ifdef HAVE_GETIFADDRS
  45#include <arpa/inet.h>
  46#include <sys/socket.h>
  47#include <net/if.h>
  48#if defined(__NetBSD__) || defined(__OpenBSD__)
  49#include <net/if_arp.h>
  50#include <netinet/if_ether.h>
  51#else
  52#include <net/ethernet.h>
  53#endif
  54#ifdef CONFIG_SOLARIS
  55#include <sys/sockio.h>
  56#endif
  57#endif
  58
  59static void ga_wait_child(pid_t pid, int *status, Error **errp)
  60{
  61    pid_t rpid;
  62
  63    *status = 0;
  64
  65    rpid = RETRY_ON_EINTR(waitpid(pid, status, 0));
  66
  67    if (rpid == -1) {
  68        error_setg_errno(errp, errno, "failed to wait for child (pid: %d)",
  69                         pid);
  70        return;
  71    }
  72
  73    g_assert(rpid == pid);
  74}
  75
  76void qmp_guest_shutdown(const char *mode, Error **errp)
  77{
  78    const char *shutdown_flag;
  79    Error *local_err = NULL;
  80    pid_t pid;
  81    int status;
  82
  83#ifdef CONFIG_SOLARIS
  84    const char *powerdown_flag = "-i5";
  85    const char *halt_flag = "-i0";
  86    const char *reboot_flag = "-i6";
  87#elif defined(CONFIG_BSD)
  88    const char *powerdown_flag = "-p";
  89    const char *halt_flag = "-h";
  90    const char *reboot_flag = "-r";
  91#else
  92    const char *powerdown_flag = "-P";
  93    const char *halt_flag = "-H";
  94    const char *reboot_flag = "-r";
  95#endif
  96
  97    slog("guest-shutdown called, mode: %s", mode);
  98    if (!mode || strcmp(mode, "powerdown") == 0) {
  99        shutdown_flag = powerdown_flag;
 100    } else if (strcmp(mode, "halt") == 0) {
 101        shutdown_flag = halt_flag;
 102    } else if (strcmp(mode, "reboot") == 0) {
 103        shutdown_flag = reboot_flag;
 104    } else {
 105        error_setg(errp,
 106                   "mode is invalid (valid values are: halt|powerdown|reboot");
 107        return;
 108    }
 109
 110    pid = fork();
 111    if (pid == 0) {
 112        /* child, start the shutdown */
 113        setsid();
 114        reopen_fd_to_null(0);
 115        reopen_fd_to_null(1);
 116        reopen_fd_to_null(2);
 117
 118#ifdef CONFIG_SOLARIS
 119        execl("/sbin/shutdown", "shutdown", shutdown_flag, "-g0", "-y",
 120              "hypervisor initiated shutdown", (char *)NULL);
 121#elif defined(CONFIG_BSD)
 122        execl("/sbin/shutdown", "shutdown", shutdown_flag, "+0",
 123               "hypervisor initiated shutdown", (char *)NULL);
 124#else
 125        execl("/sbin/shutdown", "shutdown", "-h", shutdown_flag, "+0",
 126               "hypervisor initiated shutdown", (char *)NULL);
 127#endif
 128        _exit(EXIT_FAILURE);
 129    } else if (pid < 0) {
 130        error_setg_errno(errp, errno, "failed to create child process");
 131        return;
 132    }
 133
 134    ga_wait_child(pid, &status, &local_err);
 135    if (local_err) {
 136        error_propagate(errp, local_err);
 137        return;
 138    }
 139
 140    if (!WIFEXITED(status)) {
 141        error_setg(errp, "child process has terminated abnormally");
 142        return;
 143    }
 144
 145    if (WEXITSTATUS(status)) {
 146        error_setg(errp, "child process has failed to shutdown");
 147        return;
 148    }
 149
 150    /* succeeded */
 151}
 152
 153void qmp_guest_set_time(bool has_time, int64_t time_ns, Error **errp)
 154{
 155    int ret;
 156    int status;
 157    pid_t pid;
 158    Error *local_err = NULL;
 159    struct timeval tv;
 160    static const char hwclock_path[] = "/sbin/hwclock";
 161    static int hwclock_available = -1;
 162
 163    if (hwclock_available < 0) {
 164        hwclock_available = (access(hwclock_path, X_OK) == 0);
 165    }
 166
 167    if (!hwclock_available) {
 168        error_setg(errp, QERR_UNSUPPORTED);
 169        return;
 170    }
 171
 172    /* If user has passed a time, validate and set it. */
 173    if (has_time) {
 174        GDate date = { 0, };
 175
 176        /* year-2038 will overflow in case time_t is 32bit */
 177        if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
 178            error_setg(errp, "Time %" PRId64 " is too large", time_ns);
 179            return;
 180        }
 181
 182        tv.tv_sec = time_ns / 1000000000;
 183        tv.tv_usec = (time_ns % 1000000000) / 1000;
 184        g_date_set_time_t(&date, tv.tv_sec);
 185        if (date.year < 1970 || date.year >= 2070) {
 186            error_setg_errno(errp, errno, "Invalid time");
 187            return;
 188        }
 189
 190        ret = settimeofday(&tv, NULL);
 191        if (ret < 0) {
 192            error_setg_errno(errp, errno, "Failed to set time to guest");
 193            return;
 194        }
 195    }
 196
 197    /* Now, if user has passed a time to set and the system time is set, we
 198     * just need to synchronize the hardware clock. However, if no time was
 199     * passed, user is requesting the opposite: set the system time from the
 200     * hardware clock (RTC). */
 201    pid = fork();
 202    if (pid == 0) {
 203        setsid();
 204        reopen_fd_to_null(0);
 205        reopen_fd_to_null(1);
 206        reopen_fd_to_null(2);
 207
 208        /* Use '/sbin/hwclock -w' to set RTC from the system time,
 209         * or '/sbin/hwclock -s' to set the system time from RTC. */
 210        execl(hwclock_path, "hwclock", has_time ? "-w" : "-s", NULL);
 211        _exit(EXIT_FAILURE);
 212    } else if (pid < 0) {
 213        error_setg_errno(errp, errno, "failed to create child process");
 214        return;
 215    }
 216
 217    ga_wait_child(pid, &status, &local_err);
 218    if (local_err) {
 219        error_propagate(errp, local_err);
 220        return;
 221    }
 222
 223    if (!WIFEXITED(status)) {
 224        error_setg(errp, "child process has terminated abnormally");
 225        return;
 226    }
 227
 228    if (WEXITSTATUS(status)) {
 229        error_setg(errp, "hwclock failed to set hardware clock to system time");
 230        return;
 231    }
 232}
 233
 234typedef enum {
 235    RW_STATE_NEW,
 236    RW_STATE_READING,
 237    RW_STATE_WRITING,
 238} RwState;
 239
 240struct GuestFileHandle {
 241    uint64_t id;
 242    FILE *fh;
 243    RwState state;
 244    QTAILQ_ENTRY(GuestFileHandle) next;
 245};
 246
 247static struct {
 248    QTAILQ_HEAD(, GuestFileHandle) filehandles;
 249} guest_file_state = {
 250    .filehandles = QTAILQ_HEAD_INITIALIZER(guest_file_state.filehandles),
 251};
 252
 253static int64_t guest_file_handle_add(FILE *fh, Error **errp)
 254{
 255    GuestFileHandle *gfh;
 256    int64_t handle;
 257
 258    handle = ga_get_fd_handle(ga_state, errp);
 259    if (handle < 0) {
 260        return -1;
 261    }
 262
 263    gfh = g_new0(GuestFileHandle, 1);
 264    gfh->id = handle;
 265    gfh->fh = fh;
 266    QTAILQ_INSERT_TAIL(&guest_file_state.filehandles, gfh, next);
 267
 268    return handle;
 269}
 270
 271GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
 272{
 273    GuestFileHandle *gfh;
 274
 275    QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next)
 276    {
 277        if (gfh->id == id) {
 278            return gfh;
 279        }
 280    }
 281
 282    error_setg(errp, "handle '%" PRId64 "' has not been found", id);
 283    return NULL;
 284}
 285
 286typedef const char * const ccpc;
 287
 288#ifndef O_BINARY
 289#define O_BINARY 0
 290#endif
 291
 292/* http://pubs.opengroup.org/onlinepubs/9699919799/functions/fopen.html */
 293static const struct {
 294    ccpc *forms;
 295    int oflag_base;
 296} guest_file_open_modes[] = {
 297    { (ccpc[]){ "r",          NULL }, O_RDONLY                                 },
 298    { (ccpc[]){ "rb",         NULL }, O_RDONLY                      | O_BINARY },
 299    { (ccpc[]){ "w",          NULL }, O_WRONLY | O_CREAT | O_TRUNC             },
 300    { (ccpc[]){ "wb",         NULL }, O_WRONLY | O_CREAT | O_TRUNC  | O_BINARY },
 301    { (ccpc[]){ "a",          NULL }, O_WRONLY | O_CREAT | O_APPEND            },
 302    { (ccpc[]){ "ab",         NULL }, O_WRONLY | O_CREAT | O_APPEND | O_BINARY },
 303    { (ccpc[]){ "r+",         NULL }, O_RDWR                                   },
 304    { (ccpc[]){ "rb+", "r+b", NULL }, O_RDWR                        | O_BINARY },
 305    { (ccpc[]){ "w+",         NULL }, O_RDWR   | O_CREAT | O_TRUNC             },
 306    { (ccpc[]){ "wb+", "w+b", NULL }, O_RDWR   | O_CREAT | O_TRUNC  | O_BINARY },
 307    { (ccpc[]){ "a+",         NULL }, O_RDWR   | O_CREAT | O_APPEND            },
 308    { (ccpc[]){ "ab+", "a+b", NULL }, O_RDWR   | O_CREAT | O_APPEND | O_BINARY }
 309};
 310
 311static int
 312find_open_flag(const char *mode_str, Error **errp)
 313{
 314    unsigned mode;
 315
 316    for (mode = 0; mode < ARRAY_SIZE(guest_file_open_modes); ++mode) {
 317        ccpc *form;
 318
 319        form = guest_file_open_modes[mode].forms;
 320        while (*form != NULL && strcmp(*form, mode_str) != 0) {
 321            ++form;
 322        }
 323        if (*form != NULL) {
 324            break;
 325        }
 326    }
 327
 328    if (mode == ARRAY_SIZE(guest_file_open_modes)) {
 329        error_setg(errp, "invalid file open mode '%s'", mode_str);
 330        return -1;
 331    }
 332    return guest_file_open_modes[mode].oflag_base | O_NOCTTY | O_NONBLOCK;
 333}
 334
 335#define DEFAULT_NEW_FILE_MODE (S_IRUSR | S_IWUSR | \
 336                               S_IRGRP | S_IWGRP | \
 337                               S_IROTH | S_IWOTH)
 338
 339static FILE *
 340safe_open_or_create(const char *path, const char *mode, Error **errp)
 341{
 342    int oflag;
 343    int fd = -1;
 344    FILE *f = NULL;
 345
 346    oflag = find_open_flag(mode, errp);
 347    if (oflag < 0) {
 348        goto end;
 349    }
 350
 351    /* If the caller wants / allows creation of a new file, we implement it
 352     * with a two step process: open() + (open() / fchmod()).
 353     *
 354     * First we insist on creating the file exclusively as a new file. If
 355     * that succeeds, we're free to set any file-mode bits on it. (The
 356     * motivation is that we want to set those file-mode bits independently
 357     * of the current umask.)
 358     *
 359     * If the exclusive creation fails because the file already exists
 360     * (EEXIST is not possible for any other reason), we just attempt to
 361     * open the file, but in this case we won't be allowed to change the
 362     * file-mode bits on the preexistent file.
 363     *
 364     * The pathname should never disappear between the two open()s in
 365     * practice. If it happens, then someone very likely tried to race us.
 366     * In this case just go ahead and report the ENOENT from the second
 367     * open() to the caller.
 368     *
 369     * If the caller wants to open a preexistent file, then the first
 370     * open() is decisive and its third argument is ignored, and the second
 371     * open() and the fchmod() are never called.
 372     */
 373    fd = qga_open_cloexec(path, oflag | ((oflag & O_CREAT) ? O_EXCL : 0), 0);
 374    if (fd == -1 && errno == EEXIST) {
 375        oflag &= ~(unsigned)O_CREAT;
 376        fd = qga_open_cloexec(path, oflag, 0);
 377    }
 378    if (fd == -1) {
 379        error_setg_errno(errp, errno,
 380                         "failed to open file '%s' (mode: '%s')",
 381                         path, mode);
 382        goto end;
 383    }
 384
 385    if ((oflag & O_CREAT) && fchmod(fd, DEFAULT_NEW_FILE_MODE) == -1) {
 386        error_setg_errno(errp, errno, "failed to set permission "
 387                         "0%03o on new file '%s' (mode: '%s')",
 388                         (unsigned)DEFAULT_NEW_FILE_MODE, path, mode);
 389        goto end;
 390    }
 391
 392    f = fdopen(fd, mode);
 393    if (f == NULL) {
 394        error_setg_errno(errp, errno, "failed to associate stdio stream with "
 395                         "file descriptor %d, file '%s' (mode: '%s')",
 396                         fd, path, mode);
 397    }
 398
 399end:
 400    if (f == NULL && fd != -1) {
 401        close(fd);
 402        if (oflag & O_CREAT) {
 403            unlink(path);
 404        }
 405    }
 406    return f;
 407}
 408
 409int64_t qmp_guest_file_open(const char *path, const char *mode,
 410                            Error **errp)
 411{
 412    FILE *fh;
 413    Error *local_err = NULL;
 414    int64_t handle;
 415
 416    if (!mode) {
 417        mode = "r";
 418    }
 419    slog("guest-file-open called, filepath: %s, mode: %s", path, mode);
 420    fh = safe_open_or_create(path, mode, &local_err);
 421    if (local_err != NULL) {
 422        error_propagate(errp, local_err);
 423        return -1;
 424    }
 425
 426    /* set fd non-blocking to avoid common use cases (like reading from a
 427     * named pipe) from hanging the agent
 428     */
 429    if (!g_unix_set_fd_nonblocking(fileno(fh), true, NULL)) {
 430        fclose(fh);
 431        error_setg_errno(errp, errno, "Failed to set FD nonblocking");
 432        return -1;
 433    }
 434
 435    handle = guest_file_handle_add(fh, errp);
 436    if (handle < 0) {
 437        fclose(fh);
 438        return -1;
 439    }
 440
 441    slog("guest-file-open, handle: %" PRId64, handle);
 442    return handle;
 443}
 444
 445void qmp_guest_file_close(int64_t handle, Error **errp)
 446{
 447    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
 448    int ret;
 449
 450    slog("guest-file-close called, handle: %" PRId64, handle);
 451    if (!gfh) {
 452        return;
 453    }
 454
 455    ret = fclose(gfh->fh);
 456    if (ret == EOF) {
 457        error_setg_errno(errp, errno, "failed to close handle");
 458        return;
 459    }
 460
 461    QTAILQ_REMOVE(&guest_file_state.filehandles, gfh, next);
 462    g_free(gfh);
 463}
 464
 465GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
 466                                      int64_t count, Error **errp)
 467{
 468    GuestFileRead *read_data = NULL;
 469    guchar *buf;
 470    FILE *fh = gfh->fh;
 471    size_t read_count;
 472
 473    /* explicitly flush when switching from writing to reading */
 474    if (gfh->state == RW_STATE_WRITING) {
 475        int ret = fflush(fh);
 476        if (ret == EOF) {
 477            error_setg_errno(errp, errno, "failed to flush file");
 478            return NULL;
 479        }
 480        gfh->state = RW_STATE_NEW;
 481    }
 482
 483    buf = g_malloc0(count + 1);
 484    read_count = fread(buf, 1, count, fh);
 485    if (ferror(fh)) {
 486        error_setg_errno(errp, errno, "failed to read file");
 487    } else {
 488        buf[read_count] = 0;
 489        read_data = g_new0(GuestFileRead, 1);
 490        read_data->count = read_count;
 491        read_data->eof = feof(fh);
 492        if (read_count) {
 493            read_data->buf_b64 = g_base64_encode(buf, read_count);
 494        }
 495        gfh->state = RW_STATE_READING;
 496    }
 497    g_free(buf);
 498    clearerr(fh);
 499
 500    return read_data;
 501}
 502
 503GuestFileWrite *qmp_guest_file_write(int64_t handle, const char *buf_b64,
 504                                     bool has_count, int64_t count,
 505                                     Error **errp)
 506{
 507    GuestFileWrite *write_data = NULL;
 508    guchar *buf;
 509    gsize buf_len;
 510    int write_count;
 511    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
 512    FILE *fh;
 513
 514    if (!gfh) {
 515        return NULL;
 516    }
 517
 518    fh = gfh->fh;
 519
 520    if (gfh->state == RW_STATE_READING) {
 521        int ret = fseek(fh, 0, SEEK_CUR);
 522        if (ret == -1) {
 523            error_setg_errno(errp, errno, "failed to seek file");
 524            return NULL;
 525        }
 526        gfh->state = RW_STATE_NEW;
 527    }
 528
 529    buf = qbase64_decode(buf_b64, -1, &buf_len, errp);
 530    if (!buf) {
 531        return NULL;
 532    }
 533
 534    if (!has_count) {
 535        count = buf_len;
 536    } else if (count < 0 || count > buf_len) {
 537        error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
 538                   count);
 539        g_free(buf);
 540        return NULL;
 541    }
 542
 543    write_count = fwrite(buf, 1, count, fh);
 544    if (ferror(fh)) {
 545        error_setg_errno(errp, errno, "failed to write to file");
 546        slog("guest-file-write failed, handle: %" PRId64, handle);
 547    } else {
 548        write_data = g_new0(GuestFileWrite, 1);
 549        write_data->count = write_count;
 550        write_data->eof = feof(fh);
 551        gfh->state = RW_STATE_WRITING;
 552    }
 553    g_free(buf);
 554    clearerr(fh);
 555
 556    return write_data;
 557}
 558
 559struct GuestFileSeek *qmp_guest_file_seek(int64_t handle, int64_t offset,
 560                                          GuestFileWhence *whence_code,
 561                                          Error **errp)
 562{
 563    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
 564    GuestFileSeek *seek_data = NULL;
 565    FILE *fh;
 566    int ret;
 567    int whence;
 568    Error *err = NULL;
 569
 570    if (!gfh) {
 571        return NULL;
 572    }
 573
 574    /* We stupidly exposed 'whence':'int' in our qapi */
 575    whence = ga_parse_whence(whence_code, &err);
 576    if (err) {
 577        error_propagate(errp, err);
 578        return NULL;
 579    }
 580
 581    fh = gfh->fh;
 582    ret = fseek(fh, offset, whence);
 583    if (ret == -1) {
 584        error_setg_errno(errp, errno, "failed to seek file");
 585        if (errno == ESPIPE) {
 586            /* file is non-seekable, stdio shouldn't be buffering anyways */
 587            gfh->state = RW_STATE_NEW;
 588        }
 589    } else {
 590        seek_data = g_new0(GuestFileSeek, 1);
 591        seek_data->position = ftell(fh);
 592        seek_data->eof = feof(fh);
 593        gfh->state = RW_STATE_NEW;
 594    }
 595    clearerr(fh);
 596
 597    return seek_data;
 598}
 599
 600void qmp_guest_file_flush(int64_t handle, Error **errp)
 601{
 602    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
 603    FILE *fh;
 604    int ret;
 605
 606    if (!gfh) {
 607        return;
 608    }
 609
 610    fh = gfh->fh;
 611    ret = fflush(fh);
 612    if (ret == EOF) {
 613        error_setg_errno(errp, errno, "failed to flush file");
 614    } else {
 615        gfh->state = RW_STATE_NEW;
 616    }
 617}
 618
 619#if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
 620void free_fs_mount_list(FsMountList *mounts)
 621{
 622     FsMount *mount, *temp;
 623
 624     if (!mounts) {
 625         return;
 626     }
 627
 628     QTAILQ_FOREACH_SAFE(mount, mounts, next, temp) {
 629         QTAILQ_REMOVE(mounts, mount, next);
 630         g_free(mount->dirname);
 631         g_free(mount->devtype);
 632         g_free(mount);
 633     }
 634}
 635#endif
 636
 637#if defined(CONFIG_FSFREEZE)
 638typedef enum {
 639    FSFREEZE_HOOK_THAW = 0,
 640    FSFREEZE_HOOK_FREEZE,
 641} FsfreezeHookArg;
 642
 643static const char *fsfreeze_hook_arg_string[] = {
 644    "thaw",
 645    "freeze",
 646};
 647
 648static void execute_fsfreeze_hook(FsfreezeHookArg arg, Error **errp)
 649{
 650    int status;
 651    pid_t pid;
 652    const char *hook;
 653    const char *arg_str = fsfreeze_hook_arg_string[arg];
 654    Error *local_err = NULL;
 655
 656    hook = ga_fsfreeze_hook(ga_state);
 657    if (!hook) {
 658        return;
 659    }
 660    if (access(hook, X_OK) != 0) {
 661        error_setg_errno(errp, errno, "can't access fsfreeze hook '%s'", hook);
 662        return;
 663    }
 664
 665    slog("executing fsfreeze hook with arg '%s'", arg_str);
 666    pid = fork();
 667    if (pid == 0) {
 668        setsid();
 669        reopen_fd_to_null(0);
 670        reopen_fd_to_null(1);
 671        reopen_fd_to_null(2);
 672
 673        execl(hook, hook, arg_str, NULL);
 674        _exit(EXIT_FAILURE);
 675    } else if (pid < 0) {
 676        error_setg_errno(errp, errno, "failed to create child process");
 677        return;
 678    }
 679
 680    ga_wait_child(pid, &status, &local_err);
 681    if (local_err) {
 682        error_propagate(errp, local_err);
 683        return;
 684    }
 685
 686    if (!WIFEXITED(status)) {
 687        error_setg(errp, "fsfreeze hook has terminated abnormally");
 688        return;
 689    }
 690
 691    status = WEXITSTATUS(status);
 692    if (status) {
 693        error_setg(errp, "fsfreeze hook has failed with status %d", status);
 694        return;
 695    }
 696}
 697
 698/*
 699 * Return status of freeze/thaw
 700 */
 701GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
 702{
 703    if (ga_is_frozen(ga_state)) {
 704        return GUEST_FSFREEZE_STATUS_FROZEN;
 705    }
 706
 707    return GUEST_FSFREEZE_STATUS_THAWED;
 708}
 709
 710int64_t qmp_guest_fsfreeze_freeze(Error **errp)
 711{
 712    return qmp_guest_fsfreeze_freeze_list(false, NULL, errp);
 713}
 714
 715int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
 716                                       strList *mountpoints,
 717                                       Error **errp)
 718{
 719    int ret;
 720    FsMountList mounts;
 721    Error *local_err = NULL;
 722
 723    slog("guest-fsfreeze called");
 724
 725    execute_fsfreeze_hook(FSFREEZE_HOOK_FREEZE, &local_err);
 726    if (local_err) {
 727        error_propagate(errp, local_err);
 728        return -1;
 729    }
 730
 731    QTAILQ_INIT(&mounts);
 732    if (!build_fs_mount_list(&mounts, &local_err)) {
 733        error_propagate(errp, local_err);
 734        return -1;
 735    }
 736
 737    /* cannot risk guest agent blocking itself on a write in this state */
 738    ga_set_frozen(ga_state);
 739
 740    ret = qmp_guest_fsfreeze_do_freeze_list(has_mountpoints, mountpoints,
 741                                            mounts, errp);
 742
 743    free_fs_mount_list(&mounts);
 744    /* We may not issue any FIFREEZE here.
 745     * Just unset ga_state here and ready for the next call.
 746     */
 747    if (ret == 0) {
 748        ga_unset_frozen(ga_state);
 749    } else if (ret < 0) {
 750        qmp_guest_fsfreeze_thaw(NULL);
 751    }
 752    return ret;
 753}
 754
 755int64_t qmp_guest_fsfreeze_thaw(Error **errp)
 756{
 757    int ret;
 758
 759    ret = qmp_guest_fsfreeze_do_thaw(errp);
 760    if (ret >= 0) {
 761        ga_unset_frozen(ga_state);
 762        execute_fsfreeze_hook(FSFREEZE_HOOK_THAW, errp);
 763    } else {
 764        ret = 0;
 765    }
 766
 767    return ret;
 768}
 769
 770static void guest_fsfreeze_cleanup(void)
 771{
 772    Error *err = NULL;
 773
 774    if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
 775        qmp_guest_fsfreeze_thaw(&err);
 776        if (err) {
 777            slog("failed to clean up frozen filesystems: %s",
 778                 error_get_pretty(err));
 779            error_free(err);
 780        }
 781    }
 782}
 783#endif
 784
 785/* linux-specific implementations. avoid this if at all possible. */
 786#if defined(__linux__)
 787#if defined(CONFIG_FSFREEZE)
 788
 789static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
 790{
 791    char *path;
 792    char *dpath;
 793    char *driver = NULL;
 794    char buf[PATH_MAX];
 795    ssize_t len;
 796
 797    path = g_strndup(syspath, pathlen);
 798    dpath = g_strdup_printf("%s/driver", path);
 799    len = readlink(dpath, buf, sizeof(buf) - 1);
 800    if (len != -1) {
 801        buf[len] = 0;
 802        driver = g_path_get_basename(buf);
 803    }
 804    g_free(dpath);
 805    g_free(path);
 806    return driver;
 807}
 808
 809static int compare_uint(const void *_a, const void *_b)
 810{
 811    unsigned int a = *(unsigned int *)_a;
 812    unsigned int b = *(unsigned int *)_b;
 813
 814    return a < b ? -1 : a > b ? 1 : 0;
 815}
 816
 817/* Walk the specified sysfs and build a sorted list of host or ata numbers */
 818static int build_hosts(char const *syspath, char const *host, bool ata,
 819                       unsigned int *hosts, int hosts_max, Error **errp)
 820{
 821    char *path;
 822    DIR *dir;
 823    struct dirent *entry;
 824    int i = 0;
 825
 826    path = g_strndup(syspath, host - syspath);
 827    dir = opendir(path);
 828    if (!dir) {
 829        error_setg_errno(errp, errno, "opendir(\"%s\")", path);
 830        g_free(path);
 831        return -1;
 832    }
 833
 834    while (i < hosts_max) {
 835        entry = readdir(dir);
 836        if (!entry) {
 837            break;
 838        }
 839        if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
 840            ++i;
 841        } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
 842            ++i;
 843        }
 844    }
 845
 846    qsort(hosts, i, sizeof(hosts[0]), compare_uint);
 847
 848    g_free(path);
 849    closedir(dir);
 850    return i;
 851}
 852
 853/*
 854 * Store disk device info for devices on the PCI bus.
 855 * Returns true if information has been stored, or false for failure.
 856 */
 857static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
 858                                           GuestDiskAddress *disk,
 859                                           Error **errp)
 860{
 861    unsigned int pci[4], host, hosts[8], tgt[3];
 862    int i, nhosts = 0, pcilen;
 863    GuestPCIAddress *pciaddr = disk->pci_controller;
 864    bool has_ata = false, has_host = false, has_tgt = false;
 865    char *p, *q, *driver = NULL;
 866    bool ret = false;
 867
 868    p = strstr(syspath, "/devices/pci");
 869    if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
 870                     pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
 871        g_debug("only pci device is supported: sysfs path '%s'", syspath);
 872        return false;
 873    }
 874
 875    p += 12 + pcilen;
 876    while (true) {
 877        driver = get_pci_driver(syspath, p - syspath, errp);
 878        if (driver && (g_str_equal(driver, "ata_piix") ||
 879                       g_str_equal(driver, "sym53c8xx") ||
 880                       g_str_equal(driver, "virtio-pci") ||
 881                       g_str_equal(driver, "ahci") ||
 882                       g_str_equal(driver, "nvme") ||
 883                       g_str_equal(driver, "xhci_hcd") ||
 884                       g_str_equal(driver, "ehci-pci"))) {
 885            break;
 886        }
 887
 888        g_free(driver);
 889        if (sscanf(p, "/%x:%x:%x.%x%n",
 890                          pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
 891            p += pcilen;
 892            continue;
 893        }
 894
 895        g_debug("unsupported driver or sysfs path '%s'", syspath);
 896        return false;
 897    }
 898
 899    p = strstr(syspath, "/target");
 900    if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
 901                    tgt, tgt + 1, tgt + 2) == 3) {
 902        has_tgt = true;
 903    }
 904
 905    p = strstr(syspath, "/ata");
 906    if (p) {
 907        q = p + 4;
 908        has_ata = true;
 909    } else {
 910        p = strstr(syspath, "/host");
 911        q = p + 5;
 912    }
 913    if (p && sscanf(q, "%u", &host) == 1) {
 914        has_host = true;
 915        nhosts = build_hosts(syspath, p, has_ata, hosts,
 916                             ARRAY_SIZE(hosts), errp);
 917        if (nhosts < 0) {
 918            goto cleanup;
 919        }
 920    }
 921
 922    pciaddr->domain = pci[0];
 923    pciaddr->bus = pci[1];
 924    pciaddr->slot = pci[2];
 925    pciaddr->function = pci[3];
 926
 927    if (strcmp(driver, "ata_piix") == 0) {
 928        /* a host per ide bus, target*:0:<unit>:0 */
 929        if (!has_host || !has_tgt) {
 930            g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
 931            goto cleanup;
 932        }
 933        for (i = 0; i < nhosts; i++) {
 934            if (host == hosts[i]) {
 935                disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
 936                disk->bus = i;
 937                disk->unit = tgt[1];
 938                break;
 939            }
 940        }
 941        if (i >= nhosts) {
 942            g_debug("no host for '%s' (driver '%s')", syspath, driver);
 943            goto cleanup;
 944        }
 945    } else if (strcmp(driver, "sym53c8xx") == 0) {
 946        /* scsi(LSI Logic): target*:0:<unit>:0 */
 947        if (!has_tgt) {
 948            g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
 949            goto cleanup;
 950        }
 951        disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
 952        disk->unit = tgt[1];
 953    } else if (strcmp(driver, "virtio-pci") == 0) {
 954        if (has_tgt) {
 955            /* virtio-scsi: target*:0:0:<unit> */
 956            disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
 957            disk->unit = tgt[2];
 958        } else {
 959            /* virtio-blk: 1 disk per 1 device */
 960            disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
 961        }
 962    } else if (strcmp(driver, "ahci") == 0) {
 963        /* ahci: 1 host per 1 unit */
 964        if (!has_host || !has_tgt) {
 965            g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
 966            goto cleanup;
 967        }
 968        for (i = 0; i < nhosts; i++) {
 969            if (host == hosts[i]) {
 970                disk->unit = i;
 971                disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
 972                break;
 973            }
 974        }
 975        if (i >= nhosts) {
 976            g_debug("no host for '%s' (driver '%s')", syspath, driver);
 977            goto cleanup;
 978        }
 979    } else if (strcmp(driver, "nvme") == 0) {
 980        disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
 981    } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
 982        disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
 983    } else {
 984        g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
 985        goto cleanup;
 986    }
 987
 988    ret = true;
 989
 990cleanup:
 991    g_free(driver);
 992    return ret;
 993}
 994
 995/*
 996 * Store disk device info for non-PCI virtio devices (for example s390x
 997 * channel I/O devices). Returns true if information has been stored, or
 998 * false for failure.
 999 */
1000static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
1001                                                 GuestDiskAddress *disk,
1002                                                 Error **errp)
1003{
1004    unsigned int tgt[3];
1005    char *p;
1006
1007    if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
1008        g_debug("Unsupported virtio device '%s'", syspath);
1009        return false;
1010    }
1011
1012    p = strstr(syspath, "/target");
1013    if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
1014                    &tgt[0], &tgt[1], &tgt[2]) == 3) {
1015        /* virtio-scsi: target*:0:<target>:<unit> */
1016        disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
1017        disk->bus = tgt[0];
1018        disk->target = tgt[1];
1019        disk->unit = tgt[2];
1020    } else {
1021        /* virtio-blk: 1 disk per 1 device */
1022        disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
1023    }
1024
1025    return true;
1026}
1027
1028/*
1029 * Store disk device info for CCW devices (s390x channel I/O devices).
1030 * Returns true if information has been stored, or false for failure.
1031 */
1032static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
1033                                           GuestDiskAddress *disk,
1034                                           Error **errp)
1035{
1036    unsigned int cssid, ssid, subchno, devno;
1037    char *p;
1038
1039    p = strstr(syspath, "/devices/css");
1040    if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
1041                     &cssid, &ssid, &subchno, &devno) < 4) {
1042        g_debug("could not parse ccw device sysfs path: %s", syspath);
1043        return false;
1044    }
1045
1046    disk->ccw_address = g_new0(GuestCCWAddress, 1);
1047    disk->ccw_address->cssid = cssid;
1048    disk->ccw_address->ssid = ssid;
1049    disk->ccw_address->subchno = subchno;
1050    disk->ccw_address->devno = devno;
1051
1052    if (strstr(p, "/virtio")) {
1053        build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1054    }
1055
1056    return true;
1057}
1058
1059/* Store disk device info specified by @sysfs into @fs */
1060static void build_guest_fsinfo_for_real_device(char const *syspath,
1061                                               GuestFilesystemInfo *fs,
1062                                               Error **errp)
1063{
1064    GuestDiskAddress *disk;
1065    GuestPCIAddress *pciaddr;
1066    bool has_hwinf;
1067#ifdef CONFIG_LIBUDEV
1068    struct udev *udev = NULL;
1069    struct udev_device *udevice = NULL;
1070#endif
1071
1072    pciaddr = g_new0(GuestPCIAddress, 1);
1073    pciaddr->domain = -1;                       /* -1 means field is invalid */
1074    pciaddr->bus = -1;
1075    pciaddr->slot = -1;
1076    pciaddr->function = -1;
1077
1078    disk = g_new0(GuestDiskAddress, 1);
1079    disk->pci_controller = pciaddr;
1080    disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
1081
1082#ifdef CONFIG_LIBUDEV
1083    udev = udev_new();
1084    udevice = udev_device_new_from_syspath(udev, syspath);
1085    if (udev == NULL || udevice == NULL) {
1086        g_debug("failed to query udev");
1087    } else {
1088        const char *devnode, *serial;
1089        devnode = udev_device_get_devnode(udevice);
1090        if (devnode != NULL) {
1091            disk->dev = g_strdup(devnode);
1092        }
1093        serial = udev_device_get_property_value(udevice, "ID_SERIAL");
1094        if (serial != NULL && *serial != 0) {
1095            disk->serial = g_strdup(serial);
1096        }
1097    }
1098
1099    udev_unref(udev);
1100    udev_device_unref(udevice);
1101#endif
1102
1103    if (strstr(syspath, "/devices/pci")) {
1104        has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
1105    } else if (strstr(syspath, "/devices/css")) {
1106        has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
1107    } else if (strstr(syspath, "/virtio")) {
1108        has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
1109    } else {
1110        g_debug("Unsupported device type for '%s'", syspath);
1111        has_hwinf = false;
1112    }
1113
1114    if (has_hwinf || disk->dev || disk->serial) {
1115        QAPI_LIST_PREPEND(fs->disk, disk);
1116    } else {
1117        qapi_free_GuestDiskAddress(disk);
1118    }
1119}
1120
1121static void build_guest_fsinfo_for_device(char const *devpath,
1122                                          GuestFilesystemInfo *fs,
1123                                          Error **errp);
1124
1125/* Store a list of slave devices of virtual volume specified by @syspath into
1126 * @fs */
1127static void build_guest_fsinfo_for_virtual_device(char const *syspath,
1128                                                  GuestFilesystemInfo *fs,
1129                                                  Error **errp)
1130{
1131    Error *err = NULL;
1132    DIR *dir;
1133    char *dirpath;
1134    struct dirent *entry;
1135
1136    dirpath = g_strdup_printf("%s/slaves", syspath);
1137    dir = opendir(dirpath);
1138    if (!dir) {
1139        if (errno != ENOENT) {
1140            error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
1141        }
1142        g_free(dirpath);
1143        return;
1144    }
1145
1146    for (;;) {
1147        errno = 0;
1148        entry = readdir(dir);
1149        if (entry == NULL) {
1150            if (errno) {
1151                error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
1152            }
1153            break;
1154        }
1155
1156        if (entry->d_type == DT_LNK) {
1157            char *path;
1158
1159            g_debug(" slave device '%s'", entry->d_name);
1160            path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
1161            build_guest_fsinfo_for_device(path, fs, &err);
1162            g_free(path);
1163
1164            if (err) {
1165                error_propagate(errp, err);
1166                break;
1167            }
1168        }
1169    }
1170
1171    g_free(dirpath);
1172    closedir(dir);
1173}
1174
1175static bool is_disk_virtual(const char *devpath, Error **errp)
1176{
1177    g_autofree char *syspath = realpath(devpath, NULL);
1178
1179    if (!syspath) {
1180        error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1181        return false;
1182    }
1183    return strstr(syspath, "/devices/virtual/block/") != NULL;
1184}
1185
1186/* Dispatch to functions for virtual/real device */
1187static void build_guest_fsinfo_for_device(char const *devpath,
1188                                          GuestFilesystemInfo *fs,
1189                                          Error **errp)
1190{
1191    ERRP_GUARD();
1192    g_autofree char *syspath = NULL;
1193    bool is_virtual = false;
1194
1195    syspath = realpath(devpath, NULL);
1196    if (!syspath) {
1197        if (errno != ENOENT) {
1198            error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
1199            return;
1200        }
1201
1202        /* ENOENT: This devpath may not exist because of container config */
1203        if (!fs->name) {
1204            fs->name = g_path_get_basename(devpath);
1205        }
1206        return;
1207    }
1208
1209    if (!fs->name) {
1210        fs->name = g_path_get_basename(syspath);
1211    }
1212
1213    g_debug("  parse sysfs path '%s'", syspath);
1214    is_virtual = is_disk_virtual(syspath, errp);
1215    if (*errp != NULL) {
1216        return;
1217    }
1218    if (is_virtual) {
1219        build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
1220    } else {
1221        build_guest_fsinfo_for_real_device(syspath, fs, errp);
1222    }
1223}
1224
1225#ifdef CONFIG_LIBUDEV
1226
1227/*
1228 * Wrapper around build_guest_fsinfo_for_device() for getting just
1229 * the disk address.
1230 */
1231static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
1232{
1233    g_autoptr(GuestFilesystemInfo) fs = NULL;
1234
1235    fs = g_new0(GuestFilesystemInfo, 1);
1236    build_guest_fsinfo_for_device(syspath, fs, errp);
1237    if (fs->disk != NULL) {
1238        return g_steal_pointer(&fs->disk->value);
1239    }
1240    return NULL;
1241}
1242
1243static char *get_alias_for_syspath(const char *syspath)
1244{
1245    struct udev *udev = NULL;
1246    struct udev_device *udevice = NULL;
1247    char *ret = NULL;
1248
1249    udev = udev_new();
1250    if (udev == NULL) {
1251        g_debug("failed to query udev");
1252        goto out;
1253    }
1254    udevice = udev_device_new_from_syspath(udev, syspath);
1255    if (udevice == NULL) {
1256        g_debug("failed to query udev for path: %s", syspath);
1257        goto out;
1258    } else {
1259        const char *alias = udev_device_get_property_value(
1260            udevice, "DM_NAME");
1261        /*
1262         * NULL means there was an error and empty string means there is no
1263         * alias. In case of no alias we return NULL instead of empty string.
1264         */
1265        if (alias == NULL) {
1266            g_debug("failed to query udev for device alias for: %s",
1267                syspath);
1268        } else if (*alias != 0) {
1269            ret = g_strdup(alias);
1270        }
1271    }
1272
1273out:
1274    udev_unref(udev);
1275    udev_device_unref(udevice);
1276    return ret;
1277}
1278
1279static char *get_device_for_syspath(const char *syspath)
1280{
1281    struct udev *udev = NULL;
1282    struct udev_device *udevice = NULL;
1283    char *ret = NULL;
1284
1285    udev = udev_new();
1286    if (udev == NULL) {
1287        g_debug("failed to query udev");
1288        goto out;
1289    }
1290    udevice = udev_device_new_from_syspath(udev, syspath);
1291    if (udevice == NULL) {
1292        g_debug("failed to query udev for path: %s", syspath);
1293        goto out;
1294    } else {
1295        ret = g_strdup(udev_device_get_devnode(udevice));
1296    }
1297
1298out:
1299    udev_unref(udev);
1300    udev_device_unref(udevice);
1301    return ret;
1302}
1303
1304static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
1305{
1306    g_autofree char *deps_dir = NULL;
1307    const gchar *dep;
1308    GDir *dp_deps = NULL;
1309
1310    /* List dependent disks */
1311    deps_dir = g_strdup_printf("%s/slaves", disk_dir);
1312    g_debug("  listing entries in: %s", deps_dir);
1313    dp_deps = g_dir_open(deps_dir, 0, NULL);
1314    if (dp_deps == NULL) {
1315        g_debug("failed to list entries in %s", deps_dir);
1316        return;
1317    }
1318    disk->has_dependencies = true;
1319    while ((dep = g_dir_read_name(dp_deps)) != NULL) {
1320        g_autofree char *dep_dir = NULL;
1321        char *dev_name;
1322
1323        /* Add dependent disks */
1324        dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
1325        dev_name = get_device_for_syspath(dep_dir);
1326        if (dev_name != NULL) {
1327            g_debug("  adding dependent device: %s", dev_name);
1328            QAPI_LIST_PREPEND(disk->dependencies, dev_name);
1329        }
1330    }
1331    g_dir_close(dp_deps);
1332}
1333
1334/*
1335 * Detect partitions subdirectory, name is "<disk_name><number>" or
1336 * "<disk_name>p<number>"
1337 *
1338 * @disk_name -- last component of /sys path (e.g. sda)
1339 * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
1340 * @disk_dev -- device node of the disk (e.g. /dev/sda)
1341 */
1342static GuestDiskInfoList *get_disk_partitions(
1343    GuestDiskInfoList *list,
1344    const char *disk_name, const char *disk_dir,
1345    const char *disk_dev)
1346{
1347    GuestDiskInfoList *ret = list;
1348    struct dirent *de_disk;
1349    DIR *dp_disk = NULL;
1350    size_t len = strlen(disk_name);
1351
1352    dp_disk = opendir(disk_dir);
1353    while ((de_disk = readdir(dp_disk)) != NULL) {
1354        g_autofree char *partition_dir = NULL;
1355        char *dev_name;
1356        GuestDiskInfo *partition;
1357
1358        if (!(de_disk->d_type & DT_DIR)) {
1359            continue;
1360        }
1361
1362        if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
1363            ((*(de_disk->d_name + len) == 'p' &&
1364            isdigit(*(de_disk->d_name + len + 1))) ||
1365                isdigit(*(de_disk->d_name + len))))) {
1366            continue;
1367        }
1368
1369        partition_dir = g_strdup_printf("%s/%s",
1370            disk_dir, de_disk->d_name);
1371        dev_name = get_device_for_syspath(partition_dir);
1372        if (dev_name == NULL) {
1373            g_debug("Failed to get device name for syspath: %s",
1374                disk_dir);
1375            continue;
1376        }
1377        partition = g_new0(GuestDiskInfo, 1);
1378        partition->name = dev_name;
1379        partition->partition = true;
1380        partition->has_dependencies = true;
1381        /* Add parent disk as dependent for easier tracking of hierarchy */
1382        QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
1383
1384        QAPI_LIST_PREPEND(ret, partition);
1385    }
1386    closedir(dp_disk);
1387
1388    return ret;
1389}
1390
1391static void get_nvme_smart(GuestDiskInfo *disk)
1392{
1393    int fd;
1394    GuestNVMeSmart *smart;
1395    NvmeSmartLog log = {0};
1396    struct nvme_admin_cmd cmd = {
1397        .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
1398        .nsid = NVME_NSID_BROADCAST,
1399        .addr = (uintptr_t)&log,
1400        .data_len = sizeof(log),
1401        .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
1402                 | (((sizeof(log) >> 2) - 1) << 16)
1403    };
1404
1405    fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
1406    if (fd == -1) {
1407        g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
1408        return;
1409    }
1410
1411    if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
1412        g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
1413        close(fd);
1414        return;
1415    }
1416
1417    disk->smart = g_new0(GuestDiskSmart, 1);
1418    disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
1419
1420    smart = &disk->smart->u.nvme;
1421    smart->critical_warning = log.critical_warning;
1422    smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
1423    smart->available_spare = log.available_spare;
1424    smart->available_spare_threshold = log.available_spare_threshold;
1425    smart->percentage_used = log.percentage_used;
1426    smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
1427    smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
1428    smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
1429    smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
1430    smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
1431    smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
1432    smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
1433    smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
1434    smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
1435    smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
1436    smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
1437    smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
1438    smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
1439    smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
1440    smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
1441    smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
1442    smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
1443    smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
1444    smart->number_of_error_log_entries_lo =
1445        le64_to_cpu(log.number_of_error_log_entries[0]);
1446    smart->number_of_error_log_entries_hi =
1447        le64_to_cpu(log.number_of_error_log_entries[1]);
1448
1449    close(fd);
1450}
1451
1452static void get_disk_smart(GuestDiskInfo *disk)
1453{
1454    if (disk->address
1455        && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
1456        get_nvme_smart(disk);
1457    }
1458}
1459
1460GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1461{
1462    GuestDiskInfoList *ret = NULL;
1463    GuestDiskInfo *disk;
1464    DIR *dp = NULL;
1465    struct dirent *de = NULL;
1466
1467    g_debug("listing /sys/block directory");
1468    dp = opendir("/sys/block");
1469    if (dp == NULL) {
1470        error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
1471        return NULL;
1472    }
1473    while ((de = readdir(dp)) != NULL) {
1474        g_autofree char *disk_dir = NULL, *line = NULL,
1475            *size_path = NULL;
1476        char *dev_name;
1477        Error *local_err = NULL;
1478        if (de->d_type != DT_LNK) {
1479            g_debug("  skipping entry: %s", de->d_name);
1480            continue;
1481        }
1482
1483        /* Check size and skip zero-sized disks */
1484        g_debug("  checking disk size");
1485        size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1486        if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1487            g_debug("  failed to read disk size");
1488            continue;
1489        }
1490        if (g_strcmp0(line, "0\n") == 0) {
1491            g_debug("  skipping zero-sized disk");
1492            continue;
1493        }
1494
1495        g_debug("  adding %s", de->d_name);
1496        disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1497        dev_name = get_device_for_syspath(disk_dir);
1498        if (dev_name == NULL) {
1499            g_debug("Failed to get device name for syspath: %s",
1500                disk_dir);
1501            continue;
1502        }
1503        disk = g_new0(GuestDiskInfo, 1);
1504        disk->name = dev_name;
1505        disk->partition = false;
1506        disk->alias = get_alias_for_syspath(disk_dir);
1507        QAPI_LIST_PREPEND(ret, disk);
1508
1509        /* Get address for non-virtual devices */
1510        bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1511        if (local_err != NULL) {
1512            g_debug("  failed to check disk path, ignoring error: %s",
1513                error_get_pretty(local_err));
1514            error_free(local_err);
1515            local_err = NULL;
1516            /* Don't try to get the address */
1517            is_virtual = true;
1518        }
1519        if (!is_virtual) {
1520            disk->address = get_disk_address(disk_dir, &local_err);
1521            if (local_err != NULL) {
1522                g_debug("  failed to get device info, ignoring error: %s",
1523                    error_get_pretty(local_err));
1524                error_free(local_err);
1525                local_err = NULL;
1526            }
1527        }
1528
1529        get_disk_deps(disk_dir, disk);
1530        get_disk_smart(disk);
1531        ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1532    }
1533
1534    closedir(dp);
1535
1536    return ret;
1537}
1538
1539#else
1540
1541GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
1542{
1543    error_setg(errp, QERR_UNSUPPORTED);
1544    return NULL;
1545}
1546
1547#endif
1548
1549/* Return a list of the disk device(s)' info which @mount lies on */
1550static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1551                                               Error **errp)
1552{
1553    GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1554    struct statvfs buf;
1555    unsigned long used, nonroot_total, fr_size;
1556    char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1557                                    mount->devmajor, mount->devminor);
1558
1559    fs->mountpoint = g_strdup(mount->dirname);
1560    fs->type = g_strdup(mount->devtype);
1561    build_guest_fsinfo_for_device(devpath, fs, errp);
1562
1563    if (statvfs(fs->mountpoint, &buf) == 0) {
1564        fr_size = buf.f_frsize;
1565        used = buf.f_blocks - buf.f_bfree;
1566        nonroot_total = used + buf.f_bavail;
1567        fs->used_bytes = used * fr_size;
1568        fs->total_bytes = nonroot_total * fr_size;
1569
1570        fs->has_total_bytes = true;
1571        fs->has_used_bytes = true;
1572    }
1573
1574    g_free(devpath);
1575
1576    return fs;
1577}
1578
1579GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1580{
1581    FsMountList mounts;
1582    struct FsMount *mount;
1583    GuestFilesystemInfoList *ret = NULL;
1584    Error *local_err = NULL;
1585
1586    QTAILQ_INIT(&mounts);
1587    if (!build_fs_mount_list(&mounts, &local_err)) {
1588        error_propagate(errp, local_err);
1589        return NULL;
1590    }
1591
1592    QTAILQ_FOREACH(mount, &mounts, next) {
1593        g_debug("Building guest fsinfo for '%s'", mount->dirname);
1594
1595        QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1596        if (local_err) {
1597            error_propagate(errp, local_err);
1598            qapi_free_GuestFilesystemInfoList(ret);
1599            ret = NULL;
1600            break;
1601        }
1602    }
1603
1604    free_fs_mount_list(&mounts);
1605    return ret;
1606}
1607#endif /* CONFIG_FSFREEZE */
1608
1609#if defined(CONFIG_FSTRIM)
1610/*
1611 * Walk list of mounted file systems in the guest, and trim them.
1612 */
1613GuestFilesystemTrimResponse *
1614qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1615{
1616    GuestFilesystemTrimResponse *response;
1617    GuestFilesystemTrimResult *result;
1618    int ret = 0;
1619    FsMountList mounts;
1620    struct FsMount *mount;
1621    int fd;
1622    struct fstrim_range r;
1623
1624    slog("guest-fstrim called");
1625
1626    QTAILQ_INIT(&mounts);
1627    if (!build_fs_mount_list(&mounts, errp)) {
1628        return NULL;
1629    }
1630
1631    response = g_malloc0(sizeof(*response));
1632
1633    QTAILQ_FOREACH(mount, &mounts, next) {
1634        result = g_malloc0(sizeof(*result));
1635        result->path = g_strdup(mount->dirname);
1636
1637        QAPI_LIST_PREPEND(response->paths, result);
1638
1639        fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1640        if (fd == -1) {
1641            result->error = g_strdup_printf("failed to open: %s",
1642                                            strerror(errno));
1643            continue;
1644        }
1645
1646        /* We try to cull filesystems we know won't work in advance, but other
1647         * filesystems may not implement fstrim for less obvious reasons.
1648         * These will report EOPNOTSUPP; while in some other cases ENOTTY
1649         * will be reported (e.g. CD-ROMs).
1650         * Any other error means an unexpected error.
1651         */
1652        r.start = 0;
1653        r.len = -1;
1654        r.minlen = has_minimum ? minimum : 0;
1655        ret = ioctl(fd, FITRIM, &r);
1656        if (ret == -1) {
1657            if (errno == ENOTTY || errno == EOPNOTSUPP) {
1658                result->error = g_strdup("trim not supported");
1659            } else {
1660                result->error = g_strdup_printf("failed to trim: %s",
1661                                                strerror(errno));
1662            }
1663            close(fd);
1664            continue;
1665        }
1666
1667        result->has_minimum = true;
1668        result->minimum = r.minlen;
1669        result->has_trimmed = true;
1670        result->trimmed = r.len;
1671        close(fd);
1672    }
1673
1674    free_fs_mount_list(&mounts);
1675    return response;
1676}
1677#endif /* CONFIG_FSTRIM */
1678
1679
1680#define LINUX_SYS_STATE_FILE "/sys/power/state"
1681#define SUSPEND_SUPPORTED 0
1682#define SUSPEND_NOT_SUPPORTED 1
1683
1684typedef enum {
1685    SUSPEND_MODE_DISK = 0,
1686    SUSPEND_MODE_RAM = 1,
1687    SUSPEND_MODE_HYBRID = 2,
1688} SuspendMode;
1689
1690/*
1691 * Executes a command in a child process using g_spawn_sync,
1692 * returning an int >= 0 representing the exit status of the
1693 * process.
1694 *
1695 * If the program wasn't found in path, returns -1.
1696 *
1697 * If a problem happened when creating the child process,
1698 * returns -1 and errp is set.
1699 */
1700static int run_process_child(const char *command[], Error **errp)
1701{
1702    int exit_status, spawn_flag;
1703    GError *g_err = NULL;
1704    bool success;
1705
1706    spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1707                 G_SPAWN_STDERR_TO_DEV_NULL;
1708
1709    success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1710                            NULL, NULL, NULL, NULL,
1711                            &exit_status, &g_err);
1712
1713    if (success) {
1714        return WEXITSTATUS(exit_status);
1715    }
1716
1717    if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1718        error_setg(errp, "failed to create child process, error '%s'",
1719                   g_err->message);
1720    }
1721
1722    g_error_free(g_err);
1723    return -1;
1724}
1725
1726static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1727{
1728    const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1729                                     "systemd-hybrid-sleep"};
1730    const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1731    int status;
1732
1733    status = run_process_child(cmd, errp);
1734
1735    /*
1736     * systemctl status uses LSB return codes so we can expect
1737     * status > 0 and be ok. To assert if the guest has support
1738     * for the selected suspend mode, status should be < 4. 4 is
1739     * the code for unknown service status, the return value when
1740     * the service does not exist. A common value is status = 3
1741     * (program is not running).
1742     */
1743    if (status > 0 && status < 4) {
1744        return true;
1745    }
1746
1747    return false;
1748}
1749
1750static void systemd_suspend(SuspendMode mode, Error **errp)
1751{
1752    Error *local_err = NULL;
1753    const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1754    const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1755    int status;
1756
1757    status = run_process_child(cmd, &local_err);
1758
1759    if (status == 0) {
1760        return;
1761    }
1762
1763    if ((status == -1) && !local_err) {
1764        error_setg(errp, "the helper program 'systemctl %s' was not found",
1765                   systemctl_args[mode]);
1766        return;
1767    }
1768
1769    if (local_err) {
1770        error_propagate(errp, local_err);
1771    } else {
1772        error_setg(errp, "the helper program 'systemctl %s' returned an "
1773                   "unexpected exit status code (%d)",
1774                   systemctl_args[mode], status);
1775    }
1776}
1777
1778static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1779{
1780    Error *local_err = NULL;
1781    const char *pmutils_args[3] = {"--hibernate", "--suspend",
1782                                   "--suspend-hybrid"};
1783    const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1784    int status;
1785
1786    status = run_process_child(cmd, &local_err);
1787
1788    if (status == SUSPEND_SUPPORTED) {
1789        return true;
1790    }
1791
1792    if ((status == -1) && !local_err) {
1793        return false;
1794    }
1795
1796    if (local_err) {
1797        error_propagate(errp, local_err);
1798    } else {
1799        error_setg(errp,
1800                   "the helper program '%s' returned an unexpected exit"
1801                   " status code (%d)", "pm-is-supported", status);
1802    }
1803
1804    return false;
1805}
1806
1807static void pmutils_suspend(SuspendMode mode, Error **errp)
1808{
1809    Error *local_err = NULL;
1810    const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1811                                       "pm-suspend-hybrid"};
1812    const char *cmd[2] = {pmutils_binaries[mode], NULL};
1813    int status;
1814
1815    status = run_process_child(cmd, &local_err);
1816
1817    if (status == 0) {
1818        return;
1819    }
1820
1821    if ((status == -1) && !local_err) {
1822        error_setg(errp, "the helper program '%s' was not found",
1823                   pmutils_binaries[mode]);
1824        return;
1825    }
1826
1827    if (local_err) {
1828        error_propagate(errp, local_err);
1829    } else {
1830        error_setg(errp,
1831                   "the helper program '%s' returned an unexpected exit"
1832                   " status code (%d)", pmutils_binaries[mode], status);
1833    }
1834}
1835
1836static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1837{
1838    const char *sysfile_strs[3] = {"disk", "mem", NULL};
1839    const char *sysfile_str = sysfile_strs[mode];
1840    char buf[32]; /* hopefully big enough */
1841    int fd;
1842    ssize_t ret;
1843
1844    if (!sysfile_str) {
1845        error_setg(errp, "unknown guest suspend mode");
1846        return false;
1847    }
1848
1849    fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1850    if (fd < 0) {
1851        return false;
1852    }
1853
1854    ret = read(fd, buf, sizeof(buf) - 1);
1855    close(fd);
1856    if (ret <= 0) {
1857        return false;
1858    }
1859    buf[ret] = '\0';
1860
1861    if (strstr(buf, sysfile_str)) {
1862        return true;
1863    }
1864    return false;
1865}
1866
1867static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1868{
1869    Error *local_err = NULL;
1870    const char *sysfile_strs[3] = {"disk", "mem", NULL};
1871    const char *sysfile_str = sysfile_strs[mode];
1872    pid_t pid;
1873    int status;
1874
1875    if (!sysfile_str) {
1876        error_setg(errp, "unknown guest suspend mode");
1877        return;
1878    }
1879
1880    pid = fork();
1881    if (!pid) {
1882        /* child */
1883        int fd;
1884
1885        setsid();
1886        reopen_fd_to_null(0);
1887        reopen_fd_to_null(1);
1888        reopen_fd_to_null(2);
1889
1890        fd = open(LINUX_SYS_STATE_FILE, O_WRONLY);
1891        if (fd < 0) {
1892            _exit(EXIT_FAILURE);
1893        }
1894
1895        if (write(fd, sysfile_str, strlen(sysfile_str)) < 0) {
1896            _exit(EXIT_FAILURE);
1897        }
1898
1899        _exit(EXIT_SUCCESS);
1900    } else if (pid < 0) {
1901        error_setg_errno(errp, errno, "failed to create child process");
1902        return;
1903    }
1904
1905    ga_wait_child(pid, &status, &local_err);
1906    if (local_err) {
1907        error_propagate(errp, local_err);
1908        return;
1909    }
1910
1911    if (WEXITSTATUS(status)) {
1912        error_setg(errp, "child process has failed to suspend");
1913    }
1914
1915}
1916
1917static void guest_suspend(SuspendMode mode, Error **errp)
1918{
1919    Error *local_err = NULL;
1920    bool mode_supported = false;
1921
1922    if (systemd_supports_mode(mode, &local_err)) {
1923        mode_supported = true;
1924        systemd_suspend(mode, &local_err);
1925
1926        if (!local_err) {
1927            return;
1928        }
1929    }
1930
1931    error_free(local_err);
1932    local_err = NULL;
1933
1934    if (pmutils_supports_mode(mode, &local_err)) {
1935        mode_supported = true;
1936        pmutils_suspend(mode, &local_err);
1937
1938        if (!local_err) {
1939            return;
1940        }
1941    }
1942
1943    error_free(local_err);
1944    local_err = NULL;
1945
1946    if (linux_sys_state_supports_mode(mode, &local_err)) {
1947        mode_supported = true;
1948        linux_sys_state_suspend(mode, &local_err);
1949    }
1950
1951    if (!mode_supported) {
1952        error_free(local_err);
1953        error_setg(errp,
1954                   "the requested suspend mode is not supported by the guest");
1955    } else {
1956        error_propagate(errp, local_err);
1957    }
1958}
1959
1960void qmp_guest_suspend_disk(Error **errp)
1961{
1962    guest_suspend(SUSPEND_MODE_DISK, errp);
1963}
1964
1965void qmp_guest_suspend_ram(Error **errp)
1966{
1967    guest_suspend(SUSPEND_MODE_RAM, errp);
1968}
1969
1970void qmp_guest_suspend_hybrid(Error **errp)
1971{
1972    guest_suspend(SUSPEND_MODE_HYBRID, errp);
1973}
1974
1975/* Transfer online/offline status between @vcpu and the guest system.
1976 *
1977 * On input either @errp or *@errp must be NULL.
1978 *
1979 * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1980 * - R: vcpu->logical_id
1981 * - W: vcpu->online
1982 * - W: vcpu->can_offline
1983 *
1984 * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1985 * - R: vcpu->logical_id
1986 * - R: vcpu->online
1987 *
1988 * Written members remain unmodified on error.
1989 */
1990static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1991                          char *dirpath, Error **errp)
1992{
1993    int fd;
1994    int res;
1995    int dirfd;
1996    static const char fn[] = "online";
1997
1998    dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1999    if (dirfd == -1) {
2000        error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2001        return;
2002    }
2003
2004    fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
2005    if (fd == -1) {
2006        if (errno != ENOENT) {
2007            error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
2008        } else if (sys2vcpu) {
2009            vcpu->online = true;
2010            vcpu->can_offline = false;
2011        } else if (!vcpu->online) {
2012            error_setg(errp, "logical processor #%" PRId64 " can't be "
2013                       "offlined", vcpu->logical_id);
2014        } /* otherwise pretend successful re-onlining */
2015    } else {
2016        unsigned char status;
2017
2018        res = pread(fd, &status, 1, 0);
2019        if (res == -1) {
2020            error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
2021        } else if (res == 0) {
2022            error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
2023                       fn);
2024        } else if (sys2vcpu) {
2025            vcpu->online = (status != '0');
2026            vcpu->can_offline = true;
2027        } else if (vcpu->online != (status != '0')) {
2028            status = '0' + vcpu->online;
2029            if (pwrite(fd, &status, 1, 0) == -1) {
2030                error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
2031                                 fn);
2032            }
2033        } /* otherwise pretend successful re-(on|off)-lining */
2034
2035        res = close(fd);
2036        g_assert(res == 0);
2037    }
2038
2039    res = close(dirfd);
2040    g_assert(res == 0);
2041}
2042
2043GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2044{
2045    GuestLogicalProcessorList *head, **tail;
2046    const char *cpu_dir = "/sys/devices/system/cpu";
2047    const gchar *line;
2048    g_autoptr(GDir) cpu_gdir = NULL;
2049    Error *local_err = NULL;
2050
2051    head = NULL;
2052    tail = &head;
2053    cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
2054
2055    if (cpu_gdir == NULL) {
2056        error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
2057        return NULL;
2058    }
2059
2060    while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
2061        GuestLogicalProcessor *vcpu;
2062        int64_t id;
2063        if (sscanf(line, "cpu%" PRId64, &id)) {
2064            g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
2065                                                    "cpu%" PRId64 "/", id);
2066            vcpu = g_malloc0(sizeof *vcpu);
2067            vcpu->logical_id = id;
2068            vcpu->has_can_offline = true; /* lolspeak ftw */
2069            transfer_vcpu(vcpu, true, path, &local_err);
2070            QAPI_LIST_APPEND(tail, vcpu);
2071        }
2072    }
2073
2074    if (local_err == NULL) {
2075        /* there's no guest with zero VCPUs */
2076        g_assert(head != NULL);
2077        return head;
2078    }
2079
2080    qapi_free_GuestLogicalProcessorList(head);
2081    error_propagate(errp, local_err);
2082    return NULL;
2083}
2084
2085int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2086{
2087    int64_t processed;
2088    Error *local_err = NULL;
2089
2090    processed = 0;
2091    while (vcpus != NULL) {
2092        char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
2093                                     vcpus->value->logical_id);
2094
2095        transfer_vcpu(vcpus->value, false, path, &local_err);
2096        g_free(path);
2097        if (local_err != NULL) {
2098            break;
2099        }
2100        ++processed;
2101        vcpus = vcpus->next;
2102    }
2103
2104    if (local_err != NULL) {
2105        if (processed == 0) {
2106            error_propagate(errp, local_err);
2107        } else {
2108            error_free(local_err);
2109        }
2110    }
2111
2112    return processed;
2113}
2114#endif /* __linux__ */
2115
2116#if defined(__linux__) || defined(__FreeBSD__)
2117void qmp_guest_set_user_password(const char *username,
2118                                 const char *password,
2119                                 bool crypted,
2120                                 Error **errp)
2121{
2122    Error *local_err = NULL;
2123    char *passwd_path = NULL;
2124    pid_t pid;
2125    int status;
2126    int datafd[2] = { -1, -1 };
2127    char *rawpasswddata = NULL;
2128    size_t rawpasswdlen;
2129    char *chpasswddata = NULL;
2130    size_t chpasswdlen;
2131
2132    rawpasswddata = (char *)qbase64_decode(password, -1, &rawpasswdlen, errp);
2133    if (!rawpasswddata) {
2134        return;
2135    }
2136    rawpasswddata = g_renew(char, rawpasswddata, rawpasswdlen + 1);
2137    rawpasswddata[rawpasswdlen] = '\0';
2138
2139    if (strchr(rawpasswddata, '\n')) {
2140        error_setg(errp, "forbidden characters in raw password");
2141        goto out;
2142    }
2143
2144    if (strchr(username, '\n') ||
2145        strchr(username, ':')) {
2146        error_setg(errp, "forbidden characters in username");
2147        goto out;
2148    }
2149
2150#ifdef __FreeBSD__
2151    chpasswddata = g_strdup(rawpasswddata);
2152    passwd_path = g_find_program_in_path("pw");
2153#else
2154    chpasswddata = g_strdup_printf("%s:%s\n", username, rawpasswddata);
2155    passwd_path = g_find_program_in_path("chpasswd");
2156#endif
2157
2158    chpasswdlen = strlen(chpasswddata);
2159
2160    if (!passwd_path) {
2161        error_setg(errp, "cannot find 'passwd' program in PATH");
2162        goto out;
2163    }
2164
2165    if (!g_unix_open_pipe(datafd, FD_CLOEXEC, NULL)) {
2166        error_setg(errp, "cannot create pipe FDs");
2167        goto out;
2168    }
2169
2170    pid = fork();
2171    if (pid == 0) {
2172        close(datafd[1]);
2173        /* child */
2174        setsid();
2175        dup2(datafd[0], 0);
2176        reopen_fd_to_null(1);
2177        reopen_fd_to_null(2);
2178
2179#ifdef __FreeBSD__
2180        const char *h_arg;
2181        h_arg = (crypted) ? "-H" : "-h";
2182        execl(passwd_path, "pw", "usermod", "-n", username, h_arg, "0", NULL);
2183#else
2184        if (crypted) {
2185            execl(passwd_path, "chpasswd", "-e", NULL);
2186        } else {
2187            execl(passwd_path, "chpasswd", NULL);
2188        }
2189#endif
2190        _exit(EXIT_FAILURE);
2191    } else if (pid < 0) {
2192        error_setg_errno(errp, errno, "failed to create child process");
2193        goto out;
2194    }
2195    close(datafd[0]);
2196    datafd[0] = -1;
2197
2198    if (qemu_write_full(datafd[1], chpasswddata, chpasswdlen) != chpasswdlen) {
2199        error_setg_errno(errp, errno, "cannot write new account password");
2200        goto out;
2201    }
2202    close(datafd[1]);
2203    datafd[1] = -1;
2204
2205    ga_wait_child(pid, &status, &local_err);
2206    if (local_err) {
2207        error_propagate(errp, local_err);
2208        goto out;
2209    }
2210
2211    if (!WIFEXITED(status)) {
2212        error_setg(errp, "child process has terminated abnormally");
2213        goto out;
2214    }
2215
2216    if (WEXITSTATUS(status)) {
2217        error_setg(errp, "child process has failed to set user password");
2218        goto out;
2219    }
2220
2221out:
2222    g_free(chpasswddata);
2223    g_free(rawpasswddata);
2224    g_free(passwd_path);
2225    if (datafd[0] != -1) {
2226        close(datafd[0]);
2227    }
2228    if (datafd[1] != -1) {
2229        close(datafd[1]);
2230    }
2231}
2232#else /* __linux__ || __FreeBSD__ */
2233void qmp_guest_set_user_password(const char *username,
2234                                 const char *password,
2235                                 bool crypted,
2236                                 Error **errp)
2237{
2238    error_setg(errp, QERR_UNSUPPORTED);
2239}
2240#endif /* __linux__ || __FreeBSD__ */
2241
2242#ifdef __linux__
2243static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
2244                               int size, Error **errp)
2245{
2246    int fd;
2247    int res;
2248
2249    errno = 0;
2250    fd = openat(dirfd, pathname, O_RDONLY);
2251    if (fd == -1) {
2252        error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2253        return;
2254    }
2255
2256    res = pread(fd, buf, size, 0);
2257    if (res == -1) {
2258        error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
2259    } else if (res == 0) {
2260        error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
2261    }
2262    close(fd);
2263}
2264
2265static void ga_write_sysfs_file(int dirfd, const char *pathname,
2266                                const char *buf, int size, Error **errp)
2267{
2268    int fd;
2269
2270    errno = 0;
2271    fd = openat(dirfd, pathname, O_WRONLY);
2272    if (fd == -1) {
2273        error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
2274        return;
2275    }
2276
2277    if (pwrite(fd, buf, size, 0) == -1) {
2278        error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
2279    }
2280
2281    close(fd);
2282}
2283
2284/* Transfer online/offline status between @mem_blk and the guest system.
2285 *
2286 * On input either @errp or *@errp must be NULL.
2287 *
2288 * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
2289 * - R: mem_blk->phys_index
2290 * - W: mem_blk->online
2291 * - W: mem_blk->can_offline
2292 *
2293 * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
2294 * - R: mem_blk->phys_index
2295 * - R: mem_blk->online
2296 *-  R: mem_blk->can_offline
2297 * Written members remain unmodified on error.
2298 */
2299static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
2300                                  GuestMemoryBlockResponse *result,
2301                                  Error **errp)
2302{
2303    char *dirpath;
2304    int dirfd;
2305    char *status;
2306    Error *local_err = NULL;
2307
2308    if (!sys2memblk) {
2309        DIR *dp;
2310
2311        if (!result) {
2312            error_setg(errp, "Internal error, 'result' should not be NULL");
2313            return;
2314        }
2315        errno = 0;
2316        dp = opendir("/sys/devices/system/memory/");
2317         /* if there is no 'memory' directory in sysfs,
2318         * we think this VM does not support online/offline memory block,
2319         * any other solution?
2320         */
2321        if (!dp) {
2322            if (errno == ENOENT) {
2323                result->response =
2324                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2325            }
2326            goto out1;
2327        }
2328        closedir(dp);
2329    }
2330
2331    dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
2332                              mem_blk->phys_index);
2333    dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2334    if (dirfd == -1) {
2335        if (sys2memblk) {
2336            error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2337        } else {
2338            if (errno == ENOENT) {
2339                result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
2340            } else {
2341                result->response =
2342                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2343            }
2344        }
2345        g_free(dirpath);
2346        goto out1;
2347    }
2348    g_free(dirpath);
2349
2350    status = g_malloc0(10);
2351    ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
2352    if (local_err) {
2353        /* treat with sysfs file that not exist in old kernel */
2354        if (errno == ENOENT) {
2355            error_free(local_err);
2356            if (sys2memblk) {
2357                mem_blk->online = true;
2358                mem_blk->can_offline = false;
2359            } else if (!mem_blk->online) {
2360                result->response =
2361                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
2362            }
2363        } else {
2364            if (sys2memblk) {
2365                error_propagate(errp, local_err);
2366            } else {
2367                error_free(local_err);
2368                result->response =
2369                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2370            }
2371        }
2372        goto out2;
2373    }
2374
2375    if (sys2memblk) {
2376        char removable = '0';
2377
2378        mem_blk->online = (strncmp(status, "online", 6) == 0);
2379
2380        ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
2381        if (local_err) {
2382            /* if no 'removable' file, it doesn't support offline mem blk */
2383            if (errno == ENOENT) {
2384                error_free(local_err);
2385                mem_blk->can_offline = false;
2386            } else {
2387                error_propagate(errp, local_err);
2388            }
2389        } else {
2390            mem_blk->can_offline = (removable != '0');
2391        }
2392    } else {
2393        if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
2394            const char *new_state = mem_blk->online ? "online" : "offline";
2395
2396            ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
2397                                &local_err);
2398            if (local_err) {
2399                error_free(local_err);
2400                result->response =
2401                    GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
2402                goto out2;
2403            }
2404
2405            result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
2406            result->has_error_code = false;
2407        } /* otherwise pretend successful re-(on|off)-lining */
2408    }
2409    g_free(status);
2410    close(dirfd);
2411    return;
2412
2413out2:
2414    g_free(status);
2415    close(dirfd);
2416out1:
2417    if (!sys2memblk) {
2418        result->has_error_code = true;
2419        result->error_code = errno;
2420    }
2421}
2422
2423GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2424{
2425    GuestMemoryBlockList *head, **tail;
2426    Error *local_err = NULL;
2427    struct dirent *de;
2428    DIR *dp;
2429
2430    head = NULL;
2431    tail = &head;
2432
2433    dp = opendir("/sys/devices/system/memory/");
2434    if (!dp) {
2435        /* it's ok if this happens to be a system that doesn't expose
2436         * memory blocks via sysfs, but otherwise we should report
2437         * an error
2438         */
2439        if (errno != ENOENT) {
2440            error_setg_errno(errp, errno, "Can't open directory"
2441                             "\"/sys/devices/system/memory/\"");
2442        }
2443        return NULL;
2444    }
2445
2446    /* Note: the phys_index of memory block may be discontinuous,
2447     * this is because a memblk is the unit of the Sparse Memory design, which
2448     * allows discontinuous memory ranges (ex. NUMA), so here we should
2449     * traverse the memory block directory.
2450     */
2451    while ((de = readdir(dp)) != NULL) {
2452        GuestMemoryBlock *mem_blk;
2453
2454        if ((strncmp(de->d_name, "memory", 6) != 0) ||
2455            !(de->d_type & DT_DIR)) {
2456            continue;
2457        }
2458
2459        mem_blk = g_malloc0(sizeof *mem_blk);
2460        /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
2461        mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
2462        mem_blk->has_can_offline = true; /* lolspeak ftw */
2463        transfer_memory_block(mem_blk, true, NULL, &local_err);
2464        if (local_err) {
2465            break;
2466        }
2467
2468        QAPI_LIST_APPEND(tail, mem_blk);
2469    }
2470
2471    closedir(dp);
2472    if (local_err == NULL) {
2473        /* there's no guest with zero memory blocks */
2474        if (head == NULL) {
2475            error_setg(errp, "guest reported zero memory blocks!");
2476        }
2477        return head;
2478    }
2479
2480    qapi_free_GuestMemoryBlockList(head);
2481    error_propagate(errp, local_err);
2482    return NULL;
2483}
2484
2485GuestMemoryBlockResponseList *
2486qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2487{
2488    GuestMemoryBlockResponseList *head, **tail;
2489    Error *local_err = NULL;
2490
2491    head = NULL;
2492    tail = &head;
2493
2494    while (mem_blks != NULL) {
2495        GuestMemoryBlockResponse *result;
2496        GuestMemoryBlock *current_mem_blk = mem_blks->value;
2497
2498        result = g_malloc0(sizeof(*result));
2499        result->phys_index = current_mem_blk->phys_index;
2500        transfer_memory_block(current_mem_blk, false, result, &local_err);
2501        if (local_err) { /* should never happen */
2502            goto err;
2503        }
2504
2505        QAPI_LIST_APPEND(tail, result);
2506        mem_blks = mem_blks->next;
2507    }
2508
2509    return head;
2510err:
2511    qapi_free_GuestMemoryBlockResponseList(head);
2512    error_propagate(errp, local_err);
2513    return NULL;
2514}
2515
2516GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2517{
2518    Error *local_err = NULL;
2519    char *dirpath;
2520    int dirfd;
2521    char *buf;
2522    GuestMemoryBlockInfo *info;
2523
2524    dirpath = g_strdup_printf("/sys/devices/system/memory/");
2525    dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
2526    if (dirfd == -1) {
2527        error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
2528        g_free(dirpath);
2529        return NULL;
2530    }
2531    g_free(dirpath);
2532
2533    buf = g_malloc0(20);
2534    ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
2535    close(dirfd);
2536    if (local_err) {
2537        g_free(buf);
2538        error_propagate(errp, local_err);
2539        return NULL;
2540    }
2541
2542    info = g_new0(GuestMemoryBlockInfo, 1);
2543    info->size = strtol(buf, NULL, 16); /* the unit is bytes */
2544
2545    g_free(buf);
2546
2547    return info;
2548}
2549
2550#define MAX_NAME_LEN 128
2551static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
2552{
2553#ifdef CONFIG_LINUX
2554    GuestDiskStatsInfoList *head = NULL, **tail = &head;
2555    const char *diskstats = "/proc/diskstats";
2556    FILE *fp;
2557    size_t n;
2558    char *line = NULL;
2559
2560    fp = fopen(diskstats, "r");
2561    if (fp  == NULL) {
2562        error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
2563        return NULL;
2564    }
2565
2566    while (getline(&line, &n, fp) != -1) {
2567        g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
2568        g_autofree GuestDiskStats *diskstat = NULL;
2569        char dev_name[MAX_NAME_LEN];
2570        unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
2571        unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
2572        unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
2573        unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
2574        unsigned int major, minor;
2575        int i;
2576
2577        i = sscanf(line, "%u %u %s %lu %lu %lu"
2578                   "%lu %lu %lu %lu %u %u %u %u"
2579                   "%lu %lu %lu %u %lu %u",
2580                   &major, &minor, dev_name,
2581                   &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
2582                   &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
2583                   &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
2584                   &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
2585                   &fl_ios, &fl_ticks);
2586
2587        if (i < 7) {
2588            continue;
2589        }
2590
2591        diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
2592        diskstatinfo->name = g_strdup(dev_name);
2593        diskstatinfo->major = major;
2594        diskstatinfo->minor = minor;
2595
2596        diskstat = g_new0(GuestDiskStats, 1);
2597        if (i == 7) {
2598            diskstat->has_read_ios = true;
2599            diskstat->read_ios = rd_ios;
2600            diskstat->has_read_sectors = true;
2601            diskstat->read_sectors = rd_merges_or_rd_sec;
2602            diskstat->has_write_ios = true;
2603            diskstat->write_ios = rd_sec_or_wr_ios;
2604            diskstat->has_write_sectors = true;
2605            diskstat->write_sectors = rd_ticks_or_wr_sec;
2606        }
2607        if (i >= 14) {
2608            diskstat->has_read_ios = true;
2609            diskstat->read_ios = rd_ios;
2610            diskstat->has_read_sectors = true;
2611            diskstat->read_sectors = rd_sec_or_wr_ios;
2612            diskstat->has_read_merges = true;
2613            diskstat->read_merges = rd_merges_or_rd_sec;
2614            diskstat->has_read_ticks = true;
2615            diskstat->read_ticks = rd_ticks_or_wr_sec;
2616            diskstat->has_write_ios = true;
2617            diskstat->write_ios = wr_ios;
2618            diskstat->has_write_sectors = true;
2619            diskstat->write_sectors = wr_sec;
2620            diskstat->has_write_merges = true;
2621            diskstat->write_merges = wr_merges;
2622            diskstat->has_write_ticks = true;
2623            diskstat->write_ticks = wr_ticks;
2624            diskstat->has_ios_pgr = true;
2625            diskstat->ios_pgr = ios_pgr;
2626            diskstat->has_total_ticks = true;
2627            diskstat->total_ticks = tot_ticks;
2628            diskstat->has_weight_ticks = true;
2629            diskstat->weight_ticks = rq_ticks;
2630        }
2631        if (i >= 18) {
2632            diskstat->has_discard_ios = true;
2633            diskstat->discard_ios = dc_ios;
2634            diskstat->has_discard_merges = true;
2635            diskstat->discard_merges = dc_merges;
2636            diskstat->has_discard_sectors = true;
2637            diskstat->discard_sectors = dc_sec;
2638            diskstat->has_discard_ticks = true;
2639            diskstat->discard_ticks = dc_ticks;
2640        }
2641        if (i >= 20) {
2642            diskstat->has_flush_ios = true;
2643            diskstat->flush_ios = fl_ios;
2644            diskstat->has_flush_ticks = true;
2645            diskstat->flush_ticks = fl_ticks;
2646        }
2647
2648        diskstatinfo->stats = g_steal_pointer(&diskstat);
2649        QAPI_LIST_APPEND(tail, diskstatinfo);
2650        diskstatinfo = NULL;
2651    }
2652    free(line);
2653    fclose(fp);
2654    return head;
2655#else
2656    g_debug("disk stats reporting available only for Linux");
2657    return NULL;
2658#endif
2659}
2660
2661GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2662{
2663    return guest_get_diskstats(errp);
2664}
2665
2666GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2667{
2668    GuestCpuStatsList *head = NULL, **tail = &head;
2669    const char *cpustats = "/proc/stat";
2670    int clk_tck = sysconf(_SC_CLK_TCK);
2671    FILE *fp;
2672    size_t n;
2673    char *line = NULL;
2674
2675    fp = fopen(cpustats, "r");
2676    if (fp  == NULL) {
2677        error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2678        return NULL;
2679    }
2680
2681    while (getline(&line, &n, fp) != -1) {
2682        GuestCpuStats *cpustat = NULL;
2683        GuestLinuxCpuStats *linuxcpustat;
2684        int i;
2685        unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2686        unsigned long nice, guest_nice;
2687        char name[64];
2688
2689        i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2690                   name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2691                   &steal, &guest, &guest_nice);
2692
2693        /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2694        if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2695            continue;
2696        }
2697
2698        if (i < 5) {
2699            slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2700            break;
2701        }
2702
2703        cpustat = g_new0(GuestCpuStats, 1);
2704        cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2705
2706        linuxcpustat = &cpustat->u.q_linux;
2707        linuxcpustat->cpu = atoi(&name[3]);
2708        linuxcpustat->user = user * 1000 / clk_tck;
2709        linuxcpustat->nice = nice * 1000 / clk_tck;
2710        linuxcpustat->system = system * 1000 / clk_tck;
2711        linuxcpustat->idle = idle * 1000 / clk_tck;
2712
2713        if (i > 5) {
2714            linuxcpustat->has_iowait = true;
2715            linuxcpustat->iowait = iowait * 1000 / clk_tck;
2716        }
2717
2718        if (i > 6) {
2719            linuxcpustat->has_irq = true;
2720            linuxcpustat->irq = irq * 1000 / clk_tck;
2721            linuxcpustat->has_softirq = true;
2722            linuxcpustat->softirq = softirq * 1000 / clk_tck;
2723        }
2724
2725        if (i > 8) {
2726            linuxcpustat->has_steal = true;
2727            linuxcpustat->steal = steal * 1000 / clk_tck;
2728        }
2729
2730        if (i > 9) {
2731            linuxcpustat->has_guest = true;
2732            linuxcpustat->guest = guest * 1000 / clk_tck;
2733        }
2734
2735        if (i > 10) {
2736            linuxcpustat->has_guest = true;
2737            linuxcpustat->guest = guest * 1000 / clk_tck;
2738            linuxcpustat->has_guestnice = true;
2739            linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2740        }
2741
2742        QAPI_LIST_APPEND(tail, cpustat);
2743    }
2744
2745    free(line);
2746    fclose(fp);
2747    return head;
2748}
2749
2750#else /* defined(__linux__) */
2751
2752void qmp_guest_suspend_disk(Error **errp)
2753{
2754    error_setg(errp, QERR_UNSUPPORTED);
2755}
2756
2757void qmp_guest_suspend_ram(Error **errp)
2758{
2759    error_setg(errp, QERR_UNSUPPORTED);
2760}
2761
2762void qmp_guest_suspend_hybrid(Error **errp)
2763{
2764    error_setg(errp, QERR_UNSUPPORTED);
2765}
2766
2767GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
2768{
2769    error_setg(errp, QERR_UNSUPPORTED);
2770    return NULL;
2771}
2772
2773int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
2774{
2775    error_setg(errp, QERR_UNSUPPORTED);
2776    return -1;
2777}
2778
2779GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
2780{
2781    error_setg(errp, QERR_UNSUPPORTED);
2782    return NULL;
2783}
2784
2785GuestMemoryBlockResponseList *
2786qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
2787{
2788    error_setg(errp, QERR_UNSUPPORTED);
2789    return NULL;
2790}
2791
2792GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
2793{
2794    error_setg(errp, QERR_UNSUPPORTED);
2795    return NULL;
2796}
2797
2798#endif
2799
2800#ifdef HAVE_GETIFADDRS
2801static GuestNetworkInterface *
2802guest_find_interface(GuestNetworkInterfaceList *head,
2803                     const char *name)
2804{
2805    for (; head; head = head->next) {
2806        if (strcmp(head->value->name, name) == 0) {
2807            return head->value;
2808        }
2809    }
2810
2811    return NULL;
2812}
2813
2814static int guest_get_network_stats(const char *name,
2815                       GuestNetworkInterfaceStat *stats)
2816{
2817#ifdef CONFIG_LINUX
2818    int name_len;
2819    char const *devinfo = "/proc/net/dev";
2820    FILE *fp;
2821    char *line = NULL, *colon;
2822    size_t n = 0;
2823    fp = fopen(devinfo, "r");
2824    if (!fp) {
2825        g_debug("failed to open network stats %s: %s", devinfo,
2826                g_strerror(errno));
2827        return -1;
2828    }
2829    name_len = strlen(name);
2830    while (getline(&line, &n, fp) != -1) {
2831        long long dummy;
2832        long long rx_bytes;
2833        long long rx_packets;
2834        long long rx_errs;
2835        long long rx_dropped;
2836        long long tx_bytes;
2837        long long tx_packets;
2838        long long tx_errs;
2839        long long tx_dropped;
2840        char *trim_line;
2841        trim_line = g_strchug(line);
2842        if (trim_line[0] == '\0') {
2843            continue;
2844        }
2845        colon = strchr(trim_line, ':');
2846        if (!colon) {
2847            continue;
2848        }
2849        if (colon - name_len  == trim_line &&
2850           strncmp(trim_line, name, name_len) == 0) {
2851            if (sscanf(colon + 1,
2852                "%lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld",
2853                  &rx_bytes, &rx_packets, &rx_errs, &rx_dropped,
2854                  &dummy, &dummy, &dummy, &dummy,
2855                  &tx_bytes, &tx_packets, &tx_errs, &tx_dropped,
2856                  &dummy, &dummy, &dummy, &dummy) != 16) {
2857                continue;
2858            }
2859            stats->rx_bytes = rx_bytes;
2860            stats->rx_packets = rx_packets;
2861            stats->rx_errs = rx_errs;
2862            stats->rx_dropped = rx_dropped;
2863            stats->tx_bytes = tx_bytes;
2864            stats->tx_packets = tx_packets;
2865            stats->tx_errs = tx_errs;
2866            stats->tx_dropped = tx_dropped;
2867            fclose(fp);
2868            g_free(line);
2869            return 0;
2870        }
2871    }
2872    fclose(fp);
2873    g_free(line);
2874    g_debug("/proc/net/dev: Interface '%s' not found", name);
2875#else /* !CONFIG_LINUX */
2876    g_debug("Network stats reporting available only for Linux");
2877#endif /* !CONFIG_LINUX */
2878    return -1;
2879}
2880
2881#ifndef CONFIG_BSD
2882/*
2883 * Fill "buf" with MAC address by ifaddrs. Pointer buf must point to a
2884 * buffer with ETHER_ADDR_LEN length at least.
2885 *
2886 * Returns false in case of an error, otherwise true. "obtained" argument
2887 * is true if a MAC address was obtained successful, otherwise false.
2888 */
2889bool guest_get_hw_addr(struct ifaddrs *ifa, unsigned char *buf,
2890                       bool *obtained, Error **errp)
2891{
2892    struct ifreq ifr;
2893    int sock;
2894
2895    *obtained = false;
2896
2897    /* we haven't obtained HW address yet */
2898    sock = socket(PF_INET, SOCK_STREAM, 0);
2899    if (sock == -1) {
2900        error_setg_errno(errp, errno, "failed to create socket");
2901        return false;
2902    }
2903
2904    memset(&ifr, 0, sizeof(ifr));
2905    pstrcpy(ifr.ifr_name, IF_NAMESIZE, ifa->ifa_name);
2906    if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
2907        /*
2908         * We can't get the hw addr of this interface, but that's not a
2909         * fatal error.
2910         */
2911        if (errno == EADDRNOTAVAIL) {
2912            /* The interface doesn't have a hw addr (e.g. loopback). */
2913            g_debug("failed to get MAC address of %s: %s",
2914                    ifa->ifa_name, strerror(errno));
2915        } else{
2916            g_warning("failed to get MAC address of %s: %s",
2917                      ifa->ifa_name, strerror(errno));
2918        }
2919    } else {
2920#ifdef CONFIG_SOLARIS
2921        memcpy(buf, &ifr.ifr_addr.sa_data, ETHER_ADDR_LEN);
2922#else
2923        memcpy(buf, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
2924#endif
2925        *obtained = true;
2926    }
2927    close(sock);
2928    return true;
2929}
2930#endif /* CONFIG_BSD */
2931
2932/*
2933 * Build information about guest interfaces
2934 */
2935GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
2936{
2937    GuestNetworkInterfaceList *head = NULL, **tail = &head;
2938    struct ifaddrs *ifap, *ifa;
2939
2940    if (getifaddrs(&ifap) < 0) {
2941        error_setg_errno(errp, errno, "getifaddrs failed");
2942        goto error;
2943    }
2944
2945    for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
2946        GuestNetworkInterface *info;
2947        GuestIpAddressList **address_tail;
2948        GuestIpAddress *address_item = NULL;
2949        GuestNetworkInterfaceStat *interface_stat = NULL;
2950        char addr4[INET_ADDRSTRLEN];
2951        char addr6[INET6_ADDRSTRLEN];
2952        unsigned char mac_addr[ETHER_ADDR_LEN];
2953        bool obtained;
2954        void *p;
2955
2956        g_debug("Processing %s interface", ifa->ifa_name);
2957
2958        info = guest_find_interface(head, ifa->ifa_name);
2959
2960        if (!info) {
2961            info = g_malloc0(sizeof(*info));
2962            info->name = g_strdup(ifa->ifa_name);
2963
2964            QAPI_LIST_APPEND(tail, info);
2965        }
2966
2967        if (!info->hardware_address) {
2968            if (!guest_get_hw_addr(ifa, mac_addr, &obtained, errp)) {
2969                goto error;
2970            }
2971            if (obtained) {
2972                info->hardware_address =
2973                    g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
2974                                    (int) mac_addr[0], (int) mac_addr[1],
2975                                    (int) mac_addr[2], (int) mac_addr[3],
2976                                    (int) mac_addr[4], (int) mac_addr[5]);
2977            }
2978        }
2979
2980        if (ifa->ifa_addr &&
2981            ifa->ifa_addr->sa_family == AF_INET) {
2982            /* interface with IPv4 address */
2983            p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
2984            if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
2985                error_setg_errno(errp, errno, "inet_ntop failed");
2986                goto error;
2987            }
2988
2989            address_item = g_malloc0(sizeof(*address_item));
2990            address_item->ip_address = g_strdup(addr4);
2991            address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
2992
2993            if (ifa->ifa_netmask) {
2994                /* Count the number of set bits in netmask.
2995                 * This is safe as '1' and '0' cannot be shuffled in netmask. */
2996                p = &((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr;
2997                address_item->prefix = ctpop32(((uint32_t *) p)[0]);
2998            }
2999        } else if (ifa->ifa_addr &&
3000                   ifa->ifa_addr->sa_family == AF_INET6) {
3001            /* interface with IPv6 address */
3002            p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
3003            if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
3004                error_setg_errno(errp, errno, "inet_ntop failed");
3005                goto error;
3006            }
3007
3008            address_item = g_malloc0(sizeof(*address_item));
3009            address_item->ip_address = g_strdup(addr6);
3010            address_item->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
3011
3012            if (ifa->ifa_netmask) {
3013                /* Count the number of set bits in netmask.
3014                 * This is safe as '1' and '0' cannot be shuffled in netmask. */
3015                p = &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr;
3016                address_item->prefix =
3017                    ctpop32(((uint32_t *) p)[0]) +
3018                    ctpop32(((uint32_t *) p)[1]) +
3019                    ctpop32(((uint32_t *) p)[2]) +
3020                    ctpop32(((uint32_t *) p)[3]);
3021            }
3022        }
3023
3024        if (!address_item) {
3025            continue;
3026        }
3027
3028        address_tail = &info->ip_addresses;
3029        while (*address_tail) {
3030            address_tail = &(*address_tail)->next;
3031        }
3032        QAPI_LIST_APPEND(address_tail, address_item);
3033
3034        info->has_ip_addresses = true;
3035
3036        if (!info->statistics) {
3037            interface_stat = g_malloc0(sizeof(*interface_stat));
3038            if (guest_get_network_stats(info->name, interface_stat) == -1) {
3039                g_free(interface_stat);
3040            } else {
3041                info->statistics = interface_stat;
3042            }
3043        }
3044    }
3045
3046    freeifaddrs(ifap);
3047    return head;
3048
3049error:
3050    freeifaddrs(ifap);
3051    qapi_free_GuestNetworkInterfaceList(head);
3052    return NULL;
3053}
3054
3055#else
3056
3057GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
3058{
3059    error_setg(errp, QERR_UNSUPPORTED);
3060    return NULL;
3061}
3062
3063#endif /* HAVE_GETIFADDRS */
3064
3065#if !defined(CONFIG_FSFREEZE)
3066
3067GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
3068{
3069    error_setg(errp, QERR_UNSUPPORTED);
3070    return NULL;
3071}
3072
3073GuestFsfreezeStatus qmp_guest_fsfreeze_status(Error **errp)
3074{
3075    error_setg(errp, QERR_UNSUPPORTED);
3076
3077    return 0;
3078}
3079
3080int64_t qmp_guest_fsfreeze_freeze(Error **errp)
3081{
3082    error_setg(errp, QERR_UNSUPPORTED);
3083
3084    return 0;
3085}
3086
3087int64_t qmp_guest_fsfreeze_freeze_list(bool has_mountpoints,
3088                                       strList *mountpoints,
3089                                       Error **errp)
3090{
3091    error_setg(errp, QERR_UNSUPPORTED);
3092
3093    return 0;
3094}
3095
3096int64_t qmp_guest_fsfreeze_thaw(Error **errp)
3097{
3098    error_setg(errp, QERR_UNSUPPORTED);
3099
3100    return 0;
3101}
3102
3103GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
3104{
3105    error_setg(errp, QERR_UNSUPPORTED);
3106    return NULL;
3107}
3108
3109GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
3110{
3111    error_setg(errp, QERR_UNSUPPORTED);
3112    return NULL;
3113}
3114
3115GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
3116{
3117    error_setg(errp, QERR_UNSUPPORTED);
3118    return NULL;
3119}
3120
3121#endif /* CONFIG_FSFREEZE */
3122
3123#if !defined(CONFIG_FSTRIM)
3124GuestFilesystemTrimResponse *
3125qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
3126{
3127    error_setg(errp, QERR_UNSUPPORTED);
3128    return NULL;
3129}
3130#endif
3131
3132/* add unsupported commands to the list of blocked RPCs */
3133GList *ga_command_init_blockedrpcs(GList *blockedrpcs)
3134{
3135#if !defined(__linux__)
3136    {
3137        const char *list[] = {
3138            "guest-suspend-disk", "guest-suspend-ram",
3139            "guest-suspend-hybrid", "guest-get-vcpus", "guest-set-vcpus",
3140            "guest-get-memory-blocks", "guest-set-memory-blocks",
3141            "guest-get-memory-block-size", "guest-get-memory-block-info",
3142            NULL};
3143        char **p = (char **)list;
3144
3145        while (*p) {
3146            blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3147        }
3148    }
3149#endif
3150
3151#if !defined(HAVE_GETIFADDRS)
3152    blockedrpcs = g_list_append(blockedrpcs,
3153                              g_strdup("guest-network-get-interfaces"));
3154#endif
3155
3156#if !defined(CONFIG_FSFREEZE)
3157    {
3158        const char *list[] = {
3159            "guest-get-fsinfo", "guest-fsfreeze-status",
3160            "guest-fsfreeze-freeze", "guest-fsfreeze-freeze-list",
3161            "guest-fsfreeze-thaw", "guest-get-fsinfo",
3162            "guest-get-disks", NULL};
3163        char **p = (char **)list;
3164
3165        while (*p) {
3166            blockedrpcs = g_list_append(blockedrpcs, g_strdup(*p++));
3167        }
3168    }
3169#endif
3170
3171#if !defined(CONFIG_FSTRIM)
3172    blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-fstrim"));
3173#endif
3174
3175    blockedrpcs = g_list_append(blockedrpcs, g_strdup("guest-get-devices"));
3176
3177    return blockedrpcs;
3178}
3179
3180/* register init/cleanup routines for stateful command groups */
3181void ga_command_state_init(GAState *s, GACommandState *cs)
3182{
3183#if defined(CONFIG_FSFREEZE)
3184    ga_command_state_add(cs, NULL, guest_fsfreeze_cleanup);
3185#endif
3186}
3187
3188#ifdef HAVE_UTMPX
3189
3190#define QGA_MICRO_SECOND_TO_SECOND 1000000
3191
3192static double ga_get_login_time(struct utmpx *user_info)
3193{
3194    double seconds = (double)user_info->ut_tv.tv_sec;
3195    double useconds = (double)user_info->ut_tv.tv_usec;
3196    useconds /= QGA_MICRO_SECOND_TO_SECOND;
3197    return seconds + useconds;
3198}
3199
3200GuestUserList *qmp_guest_get_users(Error **errp)
3201{
3202    GHashTable *cache = NULL;
3203    GuestUserList *head = NULL, **tail = &head;
3204    struct utmpx *user_info = NULL;
3205    gpointer value = NULL;
3206    GuestUser *user = NULL;
3207    double login_time = 0;
3208
3209    cache = g_hash_table_new(g_str_hash, g_str_equal);
3210    setutxent();
3211
3212    for (;;) {
3213        user_info = getutxent();
3214        if (user_info == NULL) {
3215            break;
3216        } else if (user_info->ut_type != USER_PROCESS) {
3217            continue;
3218        } else if (g_hash_table_contains(cache, user_info->ut_user)) {
3219            value = g_hash_table_lookup(cache, user_info->ut_user);
3220            user = (GuestUser *)value;
3221            login_time = ga_get_login_time(user_info);
3222            /* We're ensuring the earliest login time to be sent */
3223            if (login_time < user->login_time) {
3224                user->login_time = login_time;
3225            }
3226            continue;
3227        }
3228
3229        user = g_new0(GuestUser, 1);
3230        user->user = g_strdup(user_info->ut_user);
3231        user->login_time = ga_get_login_time(user_info);
3232
3233        g_hash_table_insert(cache, user->user, user);
3234
3235        QAPI_LIST_APPEND(tail, user);
3236    }
3237    endutxent();
3238    g_hash_table_destroy(cache);
3239    return head;
3240}
3241
3242#else
3243
3244GuestUserList *qmp_guest_get_users(Error **errp)
3245{
3246    error_setg(errp, QERR_UNSUPPORTED);
3247    return NULL;
3248}
3249
3250#endif
3251
3252/* Replace escaped special characters with theire real values. The replacement
3253 * is done in place -- returned value is in the original string.
3254 */
3255static void ga_osrelease_replace_special(gchar *value)
3256{
3257    gchar *p, *p2, quote;
3258
3259    /* Trim the string at first space or semicolon if it is not enclosed in
3260     * single or double quotes. */
3261    if ((value[0] != '"') || (value[0] == '\'')) {
3262        p = strchr(value, ' ');
3263        if (p != NULL) {
3264            *p = 0;
3265        }
3266        p = strchr(value, ';');
3267        if (p != NULL) {
3268            *p = 0;
3269        }
3270        return;
3271    }
3272
3273    quote = value[0];
3274    p2 = value;
3275    p = value + 1;
3276    while (*p != 0) {
3277        if (*p == '\\') {
3278            p++;
3279            switch (*p) {
3280            case '$':
3281            case '\'':
3282            case '"':
3283            case '\\':
3284            case '`':
3285                break;
3286            default:
3287                /* Keep literal backslash followed by whatever is there */
3288                p--;
3289                break;
3290            }
3291        } else if (*p == quote) {
3292            *p2 = 0;
3293            break;
3294        }
3295        *(p2++) = *(p++);
3296    }
3297}
3298
3299static GKeyFile *ga_parse_osrelease(const char *fname)
3300{
3301    gchar *content = NULL;
3302    gchar *content2 = NULL;
3303    GError *err = NULL;
3304    GKeyFile *keys = g_key_file_new();
3305    const char *group = "[os-release]\n";
3306
3307    if (!g_file_get_contents(fname, &content, NULL, &err)) {
3308        slog("failed to read '%s', error: %s", fname, err->message);
3309        goto fail;
3310    }
3311
3312    if (!g_utf8_validate(content, -1, NULL)) {
3313        slog("file is not utf-8 encoded: %s", fname);
3314        goto fail;
3315    }
3316    content2 = g_strdup_printf("%s%s", group, content);
3317
3318    if (!g_key_file_load_from_data(keys, content2, -1, G_KEY_FILE_NONE,
3319                                   &err)) {
3320        slog("failed to parse file '%s', error: %s", fname, err->message);
3321        goto fail;
3322    }
3323
3324    g_free(content);
3325    g_free(content2);
3326    return keys;
3327
3328fail:
3329    g_error_free(err);
3330    g_free(content);
3331    g_free(content2);
3332    g_key_file_free(keys);
3333    return NULL;
3334}
3335
3336GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
3337{
3338    GuestOSInfo *info = NULL;
3339    struct utsname kinfo;
3340    GKeyFile *osrelease = NULL;
3341    const char *qga_os_release = g_getenv("QGA_OS_RELEASE");
3342
3343    info = g_new0(GuestOSInfo, 1);
3344
3345    if (uname(&kinfo) != 0) {
3346        error_setg_errno(errp, errno, "uname failed");
3347    } else {
3348        info->kernel_version = g_strdup(kinfo.version);
3349        info->kernel_release = g_strdup(kinfo.release);
3350        info->machine = g_strdup(kinfo.machine);
3351    }
3352
3353    if (qga_os_release != NULL) {
3354        osrelease = ga_parse_osrelease(qga_os_release);
3355    } else {
3356        osrelease = ga_parse_osrelease("/etc/os-release");
3357        if (osrelease == NULL) {
3358            osrelease = ga_parse_osrelease("/usr/lib/os-release");
3359        }
3360    }
3361
3362    if (osrelease != NULL) {
3363        char *value;
3364
3365#define GET_FIELD(field, osfield) do { \
3366    value = g_key_file_get_value(osrelease, "os-release", osfield, NULL); \
3367    if (value != NULL) { \
3368        ga_osrelease_replace_special(value); \
3369        info->field = value; \
3370    } \
3371} while (0)
3372        GET_FIELD(id, "ID");
3373        GET_FIELD(name, "NAME");
3374        GET_FIELD(pretty_name, "PRETTY_NAME");
3375        GET_FIELD(version, "VERSION");
3376        GET_FIELD(version_id, "VERSION_ID");
3377        GET_FIELD(variant, "VARIANT");
3378        GET_FIELD(variant_id, "VARIANT_ID");
3379#undef GET_FIELD
3380
3381        g_key_file_free(osrelease);
3382    }
3383
3384    return info;
3385}
3386
3387GuestDeviceInfoList *qmp_guest_get_devices(Error **errp)
3388{
3389    error_setg(errp, QERR_UNSUPPORTED);
3390
3391    return NULL;
3392}
3393
3394#ifndef HOST_NAME_MAX
3395# ifdef _POSIX_HOST_NAME_MAX
3396#  define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
3397# else
3398#  define HOST_NAME_MAX 255
3399# endif
3400#endif
3401
3402char *qga_get_host_name(Error **errp)
3403{
3404    long len = -1;
3405    g_autofree char *hostname = NULL;
3406
3407#ifdef _SC_HOST_NAME_MAX
3408    len = sysconf(_SC_HOST_NAME_MAX);
3409#endif /* _SC_HOST_NAME_MAX */
3410
3411    if (len < 0) {
3412        len = HOST_NAME_MAX;
3413    }
3414
3415    /* Unfortunately, gethostname() below does not guarantee a
3416     * NULL terminated string. Therefore, allocate one byte more
3417     * to be sure. */
3418    hostname = g_new0(char, len + 1);
3419
3420    if (gethostname(hostname, len) < 0) {
3421        error_setg_errno(errp, errno,
3422                         "cannot get hostname");
3423        return NULL;
3424    }
3425
3426    return g_steal_pointer(&hostname);
3427}
3428