qemu/util/userfaultfd.c
<<
>>
Prefs
   1/*
   2 * Linux UFFD-WP support
   3 *
   4 * Copyright Virtuozzo GmbH, 2020
   5 *
   6 * Authors:
   7 *  Andrey Gruzdev   <andrey.gruzdev@virtuozzo.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or
  10 * later.  See the COPYING file in the top-level directory.
  11 */
  12
  13#include "qemu/osdep.h"
  14#include "qemu/bitops.h"
  15#include "qemu/error-report.h"
  16#include "qemu/userfaultfd.h"
  17#include "trace.h"
  18#include <poll.h>
  19#include <sys/syscall.h>
  20#include <sys/ioctl.h>
  21
  22/**
  23 * uffd_query_features: query UFFD features
  24 *
  25 * Returns: 0 on success, negative value in case of an error
  26 *
  27 * @features: parameter to receive 'uffdio_api.features'
  28 */
  29int uffd_query_features(uint64_t *features)
  30{
  31    int uffd_fd;
  32    struct uffdio_api api_struct = { 0 };
  33    int ret = -1;
  34
  35    uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC);
  36    if (uffd_fd < 0) {
  37        trace_uffd_query_features_nosys(errno);
  38        return -1;
  39    }
  40
  41    api_struct.api = UFFD_API;
  42    api_struct.features = 0;
  43
  44    if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
  45        trace_uffd_query_features_api_failed(errno);
  46        goto out;
  47    }
  48    *features = api_struct.features;
  49    ret = 0;
  50
  51out:
  52    close(uffd_fd);
  53    return ret;
  54}
  55
  56/**
  57 * uffd_create_fd: create UFFD file descriptor
  58 *
  59 * Returns non-negative file descriptor or negative value in case of an error
  60 *
  61 * @features: UFFD features to request
  62 * @non_blocking: create UFFD file descriptor for non-blocking operation
  63 */
  64int uffd_create_fd(uint64_t features, bool non_blocking)
  65{
  66    int uffd_fd;
  67    int flags;
  68    struct uffdio_api api_struct = { 0 };
  69    uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
  70
  71    flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
  72    uffd_fd = syscall(__NR_userfaultfd, flags);
  73    if (uffd_fd < 0) {
  74        trace_uffd_create_fd_nosys(errno);
  75        return -1;
  76    }
  77
  78    api_struct.api = UFFD_API;
  79    api_struct.features = features;
  80    if (ioctl(uffd_fd, UFFDIO_API, &api_struct)) {
  81        trace_uffd_create_fd_api_failed(errno);
  82        goto fail;
  83    }
  84    if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  85        trace_uffd_create_fd_api_noioctl(ioctl_mask, api_struct.ioctls);
  86        goto fail;
  87    }
  88
  89    return uffd_fd;
  90
  91fail:
  92    close(uffd_fd);
  93    return -1;
  94}
  95
  96/**
  97 * uffd_close_fd: close UFFD file descriptor
  98 *
  99 * @uffd_fd: UFFD file descriptor
 100 */
 101void uffd_close_fd(int uffd_fd)
 102{
 103    assert(uffd_fd >= 0);
 104    close(uffd_fd);
 105}
 106
 107/**
 108 * uffd_register_memory: register memory range via UFFD-IO
 109 *
 110 * Returns 0 in case of success, negative value in case of an error
 111 *
 112 * @uffd_fd: UFFD file descriptor
 113 * @addr: base address of memory range
 114 * @length: length of memory range
 115 * @mode: UFFD register mode (UFFDIO_REGISTER_MODE_MISSING, ...)
 116 * @ioctls: optional pointer to receive supported IOCTL mask
 117 */
 118int uffd_register_memory(int uffd_fd, void *addr, uint64_t length,
 119        uint64_t mode, uint64_t *ioctls)
 120{
 121    struct uffdio_register uffd_register;
 122
 123    uffd_register.range.start = (uintptr_t) addr;
 124    uffd_register.range.len = length;
 125    uffd_register.mode = mode;
 126
 127    if (ioctl(uffd_fd, UFFDIO_REGISTER, &uffd_register)) {
 128        trace_uffd_register_memory_failed(addr, length, mode, errno);
 129        return -1;
 130    }
 131    if (ioctls) {
 132        *ioctls = uffd_register.ioctls;
 133    }
 134
 135    return 0;
 136}
 137
 138/**
 139 * uffd_unregister_memory: un-register memory range with UFFD-IO
 140 *
 141 * Returns 0 in case of success, negative value in case of an error
 142 *
 143 * @uffd_fd: UFFD file descriptor
 144 * @addr: base address of memory range
 145 * @length: length of memory range
 146 */
 147int uffd_unregister_memory(int uffd_fd, void *addr, uint64_t length)
 148{
 149    struct uffdio_range uffd_range;
 150
 151    uffd_range.start = (uintptr_t) addr;
 152    uffd_range.len = length;
 153
 154    if (ioctl(uffd_fd, UFFDIO_UNREGISTER, &uffd_range)) {
 155        trace_uffd_unregister_memory_failed(addr, length, errno);
 156        return -1;
 157    }
 158
 159    return 0;
 160}
 161
 162/**
 163 * uffd_change_protection: protect/un-protect memory range for writes via UFFD-IO
 164 *
 165 * Returns 0 on success, negative value in case of error
 166 *
 167 * @uffd_fd: UFFD file descriptor
 168 * @addr: base address of memory range
 169 * @length: length of memory range
 170 * @wp: write-protect/unprotect
 171 * @dont_wake: do not wake threads waiting on wr-protected page
 172 */
 173int uffd_change_protection(int uffd_fd, void *addr, uint64_t length,
 174        bool wp, bool dont_wake)
 175{
 176    struct uffdio_writeprotect uffd_writeprotect;
 177
 178    uffd_writeprotect.range.start = (uintptr_t) addr;
 179    uffd_writeprotect.range.len = length;
 180    if (!wp && dont_wake) {
 181        /* DONTWAKE is meaningful only on protection release */
 182        uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_DONTWAKE;
 183    } else {
 184        uffd_writeprotect.mode = (wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0);
 185    }
 186
 187    if (ioctl(uffd_fd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
 188        error_report("uffd_change_protection() failed: addr=%p len=%" PRIu64
 189                " mode=%" PRIx64 " errno=%i", addr, length,
 190                (uint64_t) uffd_writeprotect.mode, errno);
 191        return -1;
 192    }
 193
 194    return 0;
 195}
 196
 197/**
 198 * uffd_copy_page: copy range of pages to destination via UFFD-IO
 199 *
 200 * Copy range of source pages to the destination to resolve
 201 * missing page fault somewhere in the destination range.
 202 *
 203 * Returns 0 on success, negative value in case of an error
 204 *
 205 * @uffd_fd: UFFD file descriptor
 206 * @dst_addr: destination base address
 207 * @src_addr: source base address
 208 * @length: length of the range to copy
 209 * @dont_wake: do not wake threads waiting on missing page
 210 */
 211int uffd_copy_page(int uffd_fd, void *dst_addr, void *src_addr,
 212        uint64_t length, bool dont_wake)
 213{
 214    struct uffdio_copy uffd_copy;
 215
 216    uffd_copy.dst = (uintptr_t) dst_addr;
 217    uffd_copy.src = (uintptr_t) src_addr;
 218    uffd_copy.len = length;
 219    uffd_copy.mode = dont_wake ? UFFDIO_COPY_MODE_DONTWAKE : 0;
 220
 221    if (ioctl(uffd_fd, UFFDIO_COPY, &uffd_copy)) {
 222        error_report("uffd_copy_page() failed: dst_addr=%p src_addr=%p length=%" PRIu64
 223                " mode=%" PRIx64 " errno=%i", dst_addr, src_addr,
 224                length, (uint64_t) uffd_copy.mode, errno);
 225        return -1;
 226    }
 227
 228    return 0;
 229}
 230
 231/**
 232 * uffd_zero_page: fill range of pages with zeroes via UFFD-IO
 233 *
 234 * Fill range pages with zeroes to resolve missing page fault within the range.
 235 *
 236 * Returns 0 on success, negative value in case of an error
 237 *
 238 * @uffd_fd: UFFD file descriptor
 239 * @addr: base address
 240 * @length: length of the range to fill with zeroes
 241 * @dont_wake: do not wake threads waiting on missing page
 242 */
 243int uffd_zero_page(int uffd_fd, void *addr, uint64_t length, bool dont_wake)
 244{
 245    struct uffdio_zeropage uffd_zeropage;
 246
 247    uffd_zeropage.range.start = (uintptr_t) addr;
 248    uffd_zeropage.range.len = length;
 249    uffd_zeropage.mode = dont_wake ? UFFDIO_ZEROPAGE_MODE_DONTWAKE : 0;
 250
 251    if (ioctl(uffd_fd, UFFDIO_ZEROPAGE, &uffd_zeropage)) {
 252        error_report("uffd_zero_page() failed: addr=%p length=%" PRIu64
 253                " mode=%" PRIx64 " errno=%i", addr, length,
 254                (uint64_t) uffd_zeropage.mode, errno);
 255        return -1;
 256    }
 257
 258    return 0;
 259}
 260
 261/**
 262 * uffd_wakeup: wake up threads waiting on page UFFD-managed page fault resolution
 263 *
 264 * Wake up threads waiting on any page/pages from the designated range.
 265 * The main use case is when during some period, page faults are resolved
 266 * via UFFD-IO IOCTLs with MODE_DONTWAKE flag set, then after that all waits
 267 * for the whole memory range are satisfied in a single call to uffd_wakeup().
 268 *
 269 * Returns 0 on success, negative value in case of an error
 270 *
 271 * @uffd_fd: UFFD file descriptor
 272 * @addr: base address
 273 * @length: length of the range
 274 */
 275int uffd_wakeup(int uffd_fd, void *addr, uint64_t length)
 276{
 277    struct uffdio_range uffd_range;
 278
 279    uffd_range.start = (uintptr_t) addr;
 280    uffd_range.len = length;
 281
 282    if (ioctl(uffd_fd, UFFDIO_WAKE, &uffd_range)) {
 283        error_report("uffd_wakeup() failed: addr=%p length=%" PRIu64 " errno=%i",
 284                addr, length, errno);
 285        return -1;
 286    }
 287
 288    return 0;
 289}
 290
 291/**
 292 * uffd_read_events: read pending UFFD events
 293 *
 294 * Returns number of fetched messages, 0 if non is available or
 295 * negative value in case of an error
 296 *
 297 * @uffd_fd: UFFD file descriptor
 298 * @msgs: pointer to message buffer
 299 * @count: number of messages that can fit in the buffer
 300 */
 301int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count)
 302{
 303    ssize_t res;
 304    do {
 305        res = read(uffd_fd, msgs, count * sizeof(struct uffd_msg));
 306    } while (res < 0 && errno == EINTR);
 307
 308    if ((res < 0 && errno == EAGAIN)) {
 309        return 0;
 310    }
 311    if (res < 0) {
 312        error_report("uffd_read_events() failed: errno=%i", errno);
 313        return -1;
 314    }
 315
 316    return (int) (res / sizeof(struct uffd_msg));
 317}
 318
 319/**
 320 * uffd_poll_events: poll UFFD file descriptor for read
 321 *
 322 * Returns true if events are available for read, false otherwise
 323 *
 324 * @uffd_fd: UFFD file descriptor
 325 * @tmo: timeout value
 326 */
 327bool uffd_poll_events(int uffd_fd, int tmo)
 328{
 329    int res;
 330    struct pollfd poll_fd = { .fd = uffd_fd, .events = POLLIN, .revents = 0 };
 331
 332    do {
 333        res = poll(&poll_fd, 1, tmo);
 334    } while (res < 0 && errno == EINTR);
 335
 336    if (res == 0) {
 337        return false;
 338    }
 339    if (res < 0) {
 340        error_report("uffd_poll_events() failed: errno=%i", errno);
 341        return false;
 342    }
 343
 344    return (poll_fd.revents & POLLIN) != 0;
 345}
 346