linux/mm/secretmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright IBM Corporation, 2021
   4 *
   5 * Author: Mike Rapoport <rppt@linux.ibm.com>
   6 */
   7
   8#include <linux/mm.h>
   9#include <linux/fs.h>
  10#include <linux/swap.h>
  11#include <linux/mount.h>
  12#include <linux/memfd.h>
  13#include <linux/bitops.h>
  14#include <linux/printk.h>
  15#include <linux/pagemap.h>
  16#include <linux/syscalls.h>
  17#include <linux/pseudo_fs.h>
  18#include <linux/secretmem.h>
  19#include <linux/set_memory.h>
  20#include <linux/sched/signal.h>
  21
  22#include <uapi/linux/magic.h>
  23
  24#include <asm/tlbflush.h>
  25
  26#include "internal.h"
  27
  28#undef pr_fmt
  29#define pr_fmt(fmt) "secretmem: " fmt
  30
  31/*
  32 * Define mode and flag masks to allow validation of the system call
  33 * parameters.
  34 */
  35#define SECRETMEM_MODE_MASK     (0x0)
  36#define SECRETMEM_FLAGS_MASK    SECRETMEM_MODE_MASK
  37
  38static bool secretmem_enable __ro_after_init = 1;
  39module_param_named(enable, secretmem_enable, bool, 0400);
  40MODULE_PARM_DESC(secretmem_enable,
  41                 "Enable secretmem and memfd_secret(2) system call");
  42
  43static atomic_t secretmem_users;
  44
  45bool secretmem_active(void)
  46{
  47        return !!atomic_read(&secretmem_users);
  48}
  49
  50static vm_fault_t secretmem_fault(struct vm_fault *vmf)
  51{
  52        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
  53        struct inode *inode = file_inode(vmf->vma->vm_file);
  54        pgoff_t offset = vmf->pgoff;
  55        gfp_t gfp = vmf->gfp_mask;
  56        unsigned long addr;
  57        struct folio *folio;
  58        vm_fault_t ret;
  59        int err;
  60
  61        if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
  62                return vmf_error(-EINVAL);
  63
  64        filemap_invalidate_lock_shared(mapping);
  65
  66retry:
  67        folio = filemap_lock_folio(mapping, offset);
  68        if (IS_ERR(folio)) {
  69                folio = folio_alloc(gfp | __GFP_ZERO, 0);
  70                if (!folio) {
  71                        ret = VM_FAULT_OOM;
  72                        goto out;
  73                }
  74
  75                err = set_direct_map_invalid_noflush(folio_page(folio, 0));
  76                if (err) {
  77                        folio_put(folio);
  78                        ret = vmf_error(err);
  79                        goto out;
  80                }
  81
  82                __folio_mark_uptodate(folio);
  83                err = filemap_add_folio(mapping, folio, offset, gfp);
  84                if (unlikely(err)) {
  85                        folio_put(folio);
  86                        /*
  87                         * If a split of large page was required, it
  88                         * already happened when we marked the page invalid
  89                         * which guarantees that this call won't fail
  90                         */
  91                        set_direct_map_default_noflush(folio_page(folio, 0));
  92                        if (err == -EEXIST)
  93                                goto retry;
  94
  95                        ret = vmf_error(err);
  96                        goto out;
  97                }
  98
  99                addr = (unsigned long)folio_address(folio);
 100                flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 101        }
 102
 103        vmf->page = folio_file_page(folio, vmf->pgoff);
 104        ret = VM_FAULT_LOCKED;
 105
 106out:
 107        filemap_invalidate_unlock_shared(mapping);
 108        return ret;
 109}
 110
 111static const struct vm_operations_struct secretmem_vm_ops = {
 112        .fault = secretmem_fault,
 113};
 114
 115static int secretmem_release(struct inode *inode, struct file *file)
 116{
 117        atomic_dec(&secretmem_users);
 118        return 0;
 119}
 120
 121static int secretmem_mmap_prepare(struct vm_area_desc *desc)
 122{
 123        const unsigned long len = desc->end - desc->start;
 124
 125        if ((desc->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
 126                return -EINVAL;
 127
 128        if (!mlock_future_ok(desc->mm, desc->vm_flags | VM_LOCKED, len))
 129                return -EAGAIN;
 130
 131        desc->vm_flags |= VM_LOCKED | VM_DONTDUMP;
 132        desc->vm_ops = &secretmem_vm_ops;
 133
 134        return 0;
 135}
 136
 137bool vma_is_secretmem(struct vm_area_struct *vma)
 138{
 139        return vma->vm_ops == &secretmem_vm_ops;
 140}
 141
 142static const struct file_operations secretmem_fops = {
 143        .release        = secretmem_release,
 144        .mmap_prepare   = secretmem_mmap_prepare,
 145};
 146
 147static int secretmem_migrate_folio(struct address_space *mapping,
 148                struct folio *dst, struct folio *src, enum migrate_mode mode)
 149{
 150        return -EBUSY;
 151}
 152
 153static void secretmem_free_folio(struct folio *folio)
 154{
 155        set_direct_map_default_noflush(folio_page(folio, 0));
 156        folio_zero_segment(folio, 0, folio_size(folio));
 157}
 158
 159const struct address_space_operations secretmem_aops = {
 160        .dirty_folio    = noop_dirty_folio,
 161        .free_folio     = secretmem_free_folio,
 162        .migrate_folio  = secretmem_migrate_folio,
 163};
 164
 165static int secretmem_setattr(struct mnt_idmap *idmap,
 166                             struct dentry *dentry, struct iattr *iattr)
 167{
 168        struct inode *inode = d_inode(dentry);
 169        struct address_space *mapping = inode->i_mapping;
 170        unsigned int ia_valid = iattr->ia_valid;
 171        int ret;
 172
 173        filemap_invalidate_lock(mapping);
 174
 175        if ((ia_valid & ATTR_SIZE) && inode->i_size)
 176                ret = -EINVAL;
 177        else
 178                ret = simple_setattr(idmap, dentry, iattr);
 179
 180        filemap_invalidate_unlock(mapping);
 181
 182        return ret;
 183}
 184
 185static const struct inode_operations secretmem_iops = {
 186        .setattr = secretmem_setattr,
 187};
 188
 189static struct vfsmount *secretmem_mnt;
 190
 191static struct file *secretmem_file_create(unsigned long flags)
 192{
 193        struct file *file;
 194        struct inode *inode;
 195        const char *anon_name = "[secretmem]";
 196
 197        inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
 198        if (IS_ERR(inode))
 199                return ERR_CAST(inode);
 200
 201        file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
 202                                 O_RDWR | O_LARGEFILE, &secretmem_fops);
 203        if (IS_ERR(file))
 204                goto err_free_inode;
 205
 206        mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
 207        mapping_set_unevictable(inode->i_mapping);
 208
 209        inode->i_op = &secretmem_iops;
 210        inode->i_mapping->a_ops = &secretmem_aops;
 211
 212        /* pretend we are a normal file with zero size */
 213        inode->i_mode |= S_IFREG;
 214        inode->i_size = 0;
 215
 216        atomic_inc(&secretmem_users);
 217
 218        return file;
 219
 220err_free_inode:
 221        iput(inode);
 222        return file;
 223}
 224
 225SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
 226{
 227        struct file *file;
 228        int fd, err;
 229
 230        /* make sure local flags do not confict with global fcntl.h */
 231        BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
 232
 233        if (!secretmem_enable || !can_set_direct_map())
 234                return -ENOSYS;
 235
 236        if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
 237                return -EINVAL;
 238        if (atomic_read(&secretmem_users) < 0)
 239                return -ENFILE;
 240
 241        fd = get_unused_fd_flags(flags & O_CLOEXEC);
 242        if (fd < 0)
 243                return fd;
 244
 245        file = secretmem_file_create(flags);
 246        if (IS_ERR(file)) {
 247                err = PTR_ERR(file);
 248                goto err_put_fd;
 249        }
 250
 251        fd_install(fd, file);
 252        return fd;
 253
 254err_put_fd:
 255        put_unused_fd(fd);
 256        return err;
 257}
 258
 259static int secretmem_init_fs_context(struct fs_context *fc)
 260{
 261        struct pseudo_fs_context *ctx;
 262
 263        ctx = init_pseudo(fc, SECRETMEM_MAGIC);
 264        if (!ctx)
 265                return -ENOMEM;
 266
 267        fc->s_iflags |= SB_I_NOEXEC;
 268        fc->s_iflags |= SB_I_NODEV;
 269        return 0;
 270}
 271
 272static struct file_system_type secretmem_fs = {
 273        .name           = "secretmem",
 274        .init_fs_context = secretmem_init_fs_context,
 275        .kill_sb        = kill_anon_super,
 276};
 277
 278static int __init secretmem_init(void)
 279{
 280        if (!secretmem_enable || !can_set_direct_map())
 281                return 0;
 282
 283        secretmem_mnt = kern_mount(&secretmem_fs);
 284        if (IS_ERR(secretmem_mnt))
 285                return PTR_ERR(secretmem_mnt);
 286
 287        return 0;
 288}
 289fs_initcall(secretmem_init);
 290