linux/tools/testing/selftests/memfd/fuse_test.c
<<
>>
Prefs
   1/*
   2 * memfd GUP test-case
   3 * This tests memfd interactions with get_user_pages(). We require the
   4 * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
   5 * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
   6 * read() on files in that file-system will pin the receive-buffer pages for at
   7 * least 1s via get_user_pages().
   8 *
   9 * We use this trick to race ADD_SEALS against a write on a memfd object. The
  10 * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
  11 * the read() syscall with our memory-mapped memfd object as receive buffer to
  12 * force the kernel to write into our memfd object.
  13 */
  14
  15#define _GNU_SOURCE
  16#define __EXPORTED_HEADERS__
  17
  18#include <errno.h>
  19#include <inttypes.h>
  20#include <limits.h>
  21#include <linux/falloc.h>
  22#include <linux/fcntl.h>
  23#include <linux/memfd.h>
  24#include <sched.h>
  25#include <stdio.h>
  26#include <stdlib.h>
  27#include <signal.h>
  28#include <string.h>
  29#include <sys/mman.h>
  30#include <sys/stat.h>
  31#include <sys/syscall.h>
  32#include <sys/wait.h>
  33#include <unistd.h>
  34
  35#define MFD_DEF_SIZE 8192
  36#define STACK_SIZE 65535
  37
  38static int sys_memfd_create(const char *name,
  39                            unsigned int flags)
  40{
  41        return syscall(__NR_memfd_create, name, flags);
  42}
  43
  44static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
  45{
  46        int r, fd;
  47
  48        fd = sys_memfd_create(name, flags);
  49        if (fd < 0) {
  50                printf("memfd_create(\"%s\", %u) failed: %m\n",
  51                       name, flags);
  52                abort();
  53        }
  54
  55        r = ftruncate(fd, sz);
  56        if (r < 0) {
  57                printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
  58                abort();
  59        }
  60
  61        return fd;
  62}
  63
  64static __u64 mfd_assert_get_seals(int fd)
  65{
  66        long r;
  67
  68        r = fcntl(fd, F_GET_SEALS);
  69        if (r < 0) {
  70                printf("GET_SEALS(%d) failed: %m\n", fd);
  71                abort();
  72        }
  73
  74        return r;
  75}
  76
  77static void mfd_assert_has_seals(int fd, __u64 seals)
  78{
  79        __u64 s;
  80
  81        s = mfd_assert_get_seals(fd);
  82        if (s != seals) {
  83                printf("%llu != %llu = GET_SEALS(%d)\n",
  84                       (unsigned long long)seals, (unsigned long long)s, fd);
  85                abort();
  86        }
  87}
  88
  89static void mfd_assert_add_seals(int fd, __u64 seals)
  90{
  91        long r;
  92        __u64 s;
  93
  94        s = mfd_assert_get_seals(fd);
  95        r = fcntl(fd, F_ADD_SEALS, seals);
  96        if (r < 0) {
  97                printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
  98                       fd, (unsigned long long)s, (unsigned long long)seals);
  99                abort();
 100        }
 101}
 102
 103static int mfd_busy_add_seals(int fd, __u64 seals)
 104{
 105        long r;
 106        __u64 s;
 107
 108        r = fcntl(fd, F_GET_SEALS);
 109        if (r < 0)
 110                s = 0;
 111        else
 112                s = r;
 113
 114        r = fcntl(fd, F_ADD_SEALS, seals);
 115        if (r < 0 && errno != EBUSY) {
 116                printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
 117                       fd, (unsigned long long)s, (unsigned long long)seals);
 118                abort();
 119        }
 120
 121        return r;
 122}
 123
 124static void *mfd_assert_mmap_shared(int fd)
 125{
 126        void *p;
 127
 128        p = mmap(NULL,
 129                 MFD_DEF_SIZE,
 130                 PROT_READ | PROT_WRITE,
 131                 MAP_SHARED,
 132                 fd,
 133                 0);
 134        if (p == MAP_FAILED) {
 135                printf("mmap() failed: %m\n");
 136                abort();
 137        }
 138
 139        return p;
 140}
 141
 142static void *mfd_assert_mmap_private(int fd)
 143{
 144        void *p;
 145
 146        p = mmap(NULL,
 147                 MFD_DEF_SIZE,
 148                 PROT_READ | PROT_WRITE,
 149                 MAP_PRIVATE,
 150                 fd,
 151                 0);
 152        if (p == MAP_FAILED) {
 153                printf("mmap() failed: %m\n");
 154                abort();
 155        }
 156
 157        return p;
 158}
 159
 160static int global_mfd = -1;
 161static void *global_p = NULL;
 162
 163static int sealing_thread_fn(void *arg)
 164{
 165        int sig, r;
 166
 167        /*
 168         * This thread first waits 200ms so any pending operation in the parent
 169         * is correctly started. After that, it tries to seal @global_mfd as
 170         * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
 171         * that memory mapped object still ongoing.
 172         * We then wait one more second and try sealing again. This time it
 173         * must succeed as there shouldn't be anyone else pinning the pages.
 174         */
 175
 176        /* wait 200ms for FUSE-request to be active */
 177        usleep(200000);
 178
 179        /* unmount mapping before sealing to avoid i_mmap_writable failures */
 180        munmap(global_p, MFD_DEF_SIZE);
 181
 182        /* Try sealing the global file; expect EBUSY or success. Current
 183         * kernels will never succeed, but in the future, kernels might
 184         * implement page-replacements or other fancy ways to avoid racing
 185         * writes. */
 186        r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
 187        if (r >= 0) {
 188                printf("HURRAY! This kernel fixed GUP races!\n");
 189        } else {
 190                /* wait 1s more so the FUSE-request is done */
 191                sleep(1);
 192
 193                /* try sealing the global file again */
 194                mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
 195        }
 196
 197        return 0;
 198}
 199
 200static pid_t spawn_sealing_thread(void)
 201{
 202        uint8_t *stack;
 203        pid_t pid;
 204
 205        stack = malloc(STACK_SIZE);
 206        if (!stack) {
 207                printf("malloc(STACK_SIZE) failed: %m\n");
 208                abort();
 209        }
 210
 211        pid = clone(sealing_thread_fn,
 212                    stack + STACK_SIZE,
 213                    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
 214                    NULL);
 215        if (pid < 0) {
 216                printf("clone() failed: %m\n");
 217                abort();
 218        }
 219
 220        return pid;
 221}
 222
 223static void join_sealing_thread(pid_t pid)
 224{
 225        waitpid(pid, NULL, 0);
 226}
 227
 228int main(int argc, char **argv)
 229{
 230        static const char zero[MFD_DEF_SIZE];
 231        int fd, mfd, r;
 232        void *p;
 233        int was_sealed;
 234        pid_t pid;
 235
 236        if (argc < 2) {
 237                printf("error: please pass path to file in fuse_mnt mount-point\n");
 238                abort();
 239        }
 240
 241        /* open FUSE memfd file for GUP testing */
 242        printf("opening: %s\n", argv[1]);
 243        fd = open(argv[1], O_RDONLY | O_CLOEXEC);
 244        if (fd < 0) {
 245                printf("cannot open(\"%s\"): %m\n", argv[1]);
 246                abort();
 247        }
 248
 249        /* create new memfd-object */
 250        mfd = mfd_assert_new("kern_memfd_fuse",
 251                             MFD_DEF_SIZE,
 252                             MFD_CLOEXEC | MFD_ALLOW_SEALING);
 253
 254        /* mmap memfd-object for writing */
 255        p = mfd_assert_mmap_shared(mfd);
 256
 257        /* pass mfd+mapping to a separate sealing-thread which tries to seal
 258         * the memfd objects with SEAL_WRITE while we write into it */
 259        global_mfd = mfd;
 260        global_p = p;
 261        pid = spawn_sealing_thread();
 262
 263        /* Use read() on the FUSE file to read into our memory-mapped memfd
 264         * object. This races the other thread which tries to seal the
 265         * memfd-object.
 266         * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
 267         * This guarantees that the receive-buffer is pinned for 1s until the
 268         * data is written into it. The racing ADD_SEALS should thus fail as
 269         * the pages are still pinned. */
 270        r = read(fd, p, MFD_DEF_SIZE);
 271        if (r < 0) {
 272                printf("read() failed: %m\n");
 273                abort();
 274        } else if (!r) {
 275                printf("unexpected EOF on read()\n");
 276                abort();
 277        }
 278
 279        was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
 280
 281        /* Wait for sealing-thread to finish and verify that it
 282         * successfully sealed the file after the second try. */
 283        join_sealing_thread(pid);
 284        mfd_assert_has_seals(mfd, F_SEAL_WRITE);
 285
 286        /* *IF* the memfd-object was sealed at the time our read() returned,
 287         * then the kernel did a page-replacement or canceled the read() (or
 288         * whatever magic it did..). In that case, the memfd object is still
 289         * all zero.
 290         * In case the memfd-object was *not* sealed, the read() was successfull
 291         * and the memfd object must *not* be all zero.
 292         * Note that in real scenarios, there might be a mixture of both, but
 293         * in this test-cases, we have explicit 200ms delays which should be
 294         * enough to avoid any in-flight writes. */
 295
 296        p = mfd_assert_mmap_private(mfd);
 297        if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
 298                printf("memfd sealed during read() but data not discarded\n");
 299                abort();
 300        } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
 301                printf("memfd sealed after read() but data discarded\n");
 302                abort();
 303        }
 304
 305        close(mfd);
 306        close(fd);
 307
 308        printf("fuse: DONE\n");
 309
 310        return 0;
 311}
 312