linux/arch/arm64/kernel/mte.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2020 ARM Ltd.
   4 */
   5
   6#include <linux/bitops.h>
   7#include <linux/cpu.h>
   8#include <linux/kernel.h>
   9#include <linux/mm.h>
  10#include <linux/prctl.h>
  11#include <linux/sched.h>
  12#include <linux/sched/mm.h>
  13#include <linux/string.h>
  14#include <linux/swap.h>
  15#include <linux/swapops.h>
  16#include <linux/thread_info.h>
  17#include <linux/types.h>
  18#include <linux/uio.h>
  19
  20#include <asm/barrier.h>
  21#include <asm/cpufeature.h>
  22#include <asm/mte.h>
  23#include <asm/ptrace.h>
  24#include <asm/sysreg.h>
  25
  26static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
  27
  28#ifdef CONFIG_KASAN_HW_TAGS
  29/* Whether the MTE asynchronous mode is enabled. */
  30DEFINE_STATIC_KEY_FALSE(mte_async_mode);
  31EXPORT_SYMBOL_GPL(mte_async_mode);
  32#endif
  33
  34static void mte_sync_page_tags(struct page *page, pte_t old_pte,
  35                               bool check_swap, bool pte_is_tagged)
  36{
  37        if (check_swap && is_swap_pte(old_pte)) {
  38                swp_entry_t entry = pte_to_swp_entry(old_pte);
  39
  40                if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
  41                        return;
  42        }
  43
  44        if (!pte_is_tagged)
  45                return;
  46
  47        page_kasan_tag_reset(page);
  48        /*
  49         * We need smp_wmb() in between setting the flags and clearing the
  50         * tags because if another thread reads page->flags and builds a
  51         * tagged address out of it, there is an actual dependency to the
  52         * memory access, but on the current thread we do not guarantee that
  53         * the new page->flags are visible before the tags were updated.
  54         */
  55        smp_wmb();
  56        mte_clear_page_tags(page_address(page));
  57}
  58
  59void mte_sync_tags(pte_t old_pte, pte_t pte)
  60{
  61        struct page *page = pte_page(pte);
  62        long i, nr_pages = compound_nr(page);
  63        bool check_swap = nr_pages == 1;
  64        bool pte_is_tagged = pte_tagged(pte);
  65
  66        /* Early out if there's nothing to do */
  67        if (!check_swap && !pte_is_tagged)
  68                return;
  69
  70        /* if PG_mte_tagged is set, tags have already been initialised */
  71        for (i = 0; i < nr_pages; i++, page++) {
  72                if (!test_and_set_bit(PG_mte_tagged, &page->flags))
  73                        mte_sync_page_tags(page, old_pte, check_swap,
  74                                           pte_is_tagged);
  75        }
  76}
  77
  78int memcmp_pages(struct page *page1, struct page *page2)
  79{
  80        char *addr1, *addr2;
  81        int ret;
  82
  83        addr1 = page_address(page1);
  84        addr2 = page_address(page2);
  85        ret = memcmp(addr1, addr2, PAGE_SIZE);
  86
  87        if (!system_supports_mte() || ret)
  88                return ret;
  89
  90        /*
  91         * If the page content is identical but at least one of the pages is
  92         * tagged, return non-zero to avoid KSM merging. If only one of the
  93         * pages is tagged, set_pte_at() may zero or change the tags of the
  94         * other page via mte_sync_tags().
  95         */
  96        if (test_bit(PG_mte_tagged, &page1->flags) ||
  97            test_bit(PG_mte_tagged, &page2->flags))
  98                return addr1 != addr2;
  99
 100        return ret;
 101}
 102
 103static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
 104{
 105        /* Enable MTE Sync Mode for EL1. */
 106        sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
 107        isb();
 108
 109        pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
 110}
 111
 112#ifdef CONFIG_KASAN_HW_TAGS
 113void mte_enable_kernel_sync(void)
 114{
 115        /*
 116         * Make sure we enter this function when no PE has set
 117         * async mode previously.
 118         */
 119        WARN_ONCE(system_uses_mte_async_mode(),
 120                        "MTE async mode enabled system wide!");
 121
 122        __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
 123}
 124
 125void mte_enable_kernel_async(void)
 126{
 127        __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
 128
 129        /*
 130         * MTE async mode is set system wide by the first PE that
 131         * executes this function.
 132         *
 133         * Note: If in future KASAN acquires a runtime switching
 134         * mode in between sync and async, this strategy needs
 135         * to be reviewed.
 136         */
 137        if (!system_uses_mte_async_mode())
 138                static_branch_enable(&mte_async_mode);
 139}
 140#endif
 141
 142#ifdef CONFIG_KASAN_HW_TAGS
 143void mte_check_tfsr_el1(void)
 144{
 145        u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
 146
 147        if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
 148                /*
 149                 * Note: isb() is not required after this direct write
 150                 * because there is no indirect read subsequent to it
 151                 * (per ARM DDI 0487F.c table D13-1).
 152                 */
 153                write_sysreg_s(0, SYS_TFSR_EL1);
 154
 155                kasan_report_async();
 156        }
 157}
 158#endif
 159
 160static void mte_update_sctlr_user(struct task_struct *task)
 161{
 162        /*
 163         * This must be called with preemption disabled and can only be called
 164         * on the current or next task since the CPU must match where the thread
 165         * is going to run. The caller is responsible for calling
 166         * update_sctlr_el1() later in the same preemption disabled block.
 167         */
 168        unsigned long sctlr = task->thread.sctlr_user;
 169        unsigned long mte_ctrl = task->thread.mte_ctrl;
 170        unsigned long pref, resolved_mte_tcf;
 171
 172        pref = __this_cpu_read(mte_tcf_preferred);
 173        resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 174        sctlr &= ~SCTLR_EL1_TCF0_MASK;
 175        if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 176                sctlr |= SCTLR_EL1_TCF0_ASYNC;
 177        else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 178                sctlr |= SCTLR_EL1_TCF0_SYNC;
 179        task->thread.sctlr_user = sctlr;
 180}
 181
 182void mte_thread_init_user(void)
 183{
 184        if (!system_supports_mte())
 185                return;
 186
 187        /* clear any pending asynchronous tag fault */
 188        dsb(ish);
 189        write_sysreg_s(0, SYS_TFSRE0_EL1);
 190        clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 191        /* disable tag checking and reset tag generation mask */
 192        set_mte_ctrl(current, 0);
 193}
 194
 195void mte_thread_switch(struct task_struct *next)
 196{
 197        if (!system_supports_mte())
 198                return;
 199
 200        mte_update_sctlr_user(next);
 201
 202        /*
 203         * Check if an async tag exception occurred at EL1.
 204         *
 205         * Note: On the context switch path we rely on the dsb() present
 206         * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
 207         * are synchronized before this point.
 208         */
 209        isb();
 210        mte_check_tfsr_el1();
 211}
 212
 213void mte_suspend_enter(void)
 214{
 215        if (!system_supports_mte())
 216                return;
 217
 218        /*
 219         * The barriers are required to guarantee that the indirect writes
 220         * to TFSR_EL1 are synchronized before we report the state.
 221         */
 222        dsb(nsh);
 223        isb();
 224
 225        /* Report SYS_TFSR_EL1 before suspend entry */
 226        mte_check_tfsr_el1();
 227}
 228
 229long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 230{
 231        u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 232                        SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
 233
 234        if (!system_supports_mte())
 235                return 0;
 236
 237        if (arg & PR_MTE_TCF_ASYNC)
 238                mte_ctrl |= MTE_CTRL_TCF_ASYNC;
 239        if (arg & PR_MTE_TCF_SYNC)
 240                mte_ctrl |= MTE_CTRL_TCF_SYNC;
 241
 242        task->thread.mte_ctrl = mte_ctrl;
 243        if (task == current) {
 244                preempt_disable();
 245                mte_update_sctlr_user(task);
 246                update_sctlr_el1(task->thread.sctlr_user);
 247                preempt_enable();
 248        }
 249
 250        return 0;
 251}
 252
 253long get_mte_ctrl(struct task_struct *task)
 254{
 255        unsigned long ret;
 256        u64 mte_ctrl = task->thread.mte_ctrl;
 257        u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
 258                   SYS_GCR_EL1_EXCL_MASK;
 259
 260        if (!system_supports_mte())
 261                return 0;
 262
 263        ret = incl << PR_MTE_TAG_SHIFT;
 264        if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
 265                ret |= PR_MTE_TCF_ASYNC;
 266        if (mte_ctrl & MTE_CTRL_TCF_SYNC)
 267                ret |= PR_MTE_TCF_SYNC;
 268
 269        return ret;
 270}
 271
 272/*
 273 * Access MTE tags in another process' address space as given in mm. Update
 274 * the number of tags copied. Return 0 if any tags copied, error otherwise.
 275 * Inspired by __access_remote_vm().
 276 */
 277static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 278                                struct iovec *kiov, unsigned int gup_flags)
 279{
 280        struct vm_area_struct *vma;
 281        void __user *buf = kiov->iov_base;
 282        size_t len = kiov->iov_len;
 283        int ret;
 284        int write = gup_flags & FOLL_WRITE;
 285
 286        if (!access_ok(buf, len))
 287                return -EFAULT;
 288
 289        if (mmap_read_lock_killable(mm))
 290                return -EIO;
 291
 292        while (len) {
 293                unsigned long tags, offset;
 294                void *maddr;
 295                struct page *page = NULL;
 296
 297                ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
 298                                            &vma, NULL);
 299                if (ret <= 0)
 300                        break;
 301
 302                /*
 303                 * Only copy tags if the page has been mapped as PROT_MTE
 304                 * (PG_mte_tagged set). Otherwise the tags are not valid and
 305                 * not accessible to user. Moreover, an mprotect(PROT_MTE)
 306                 * would cause the existing tags to be cleared if the page
 307                 * was never mapped with PROT_MTE.
 308                 */
 309                if (!(vma->vm_flags & VM_MTE)) {
 310                        ret = -EOPNOTSUPP;
 311                        put_page(page);
 312                        break;
 313                }
 314                WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
 315
 316                /* limit access to the end of the page */
 317                offset = offset_in_page(addr);
 318                tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
 319
 320                maddr = page_address(page);
 321                if (write) {
 322                        tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
 323                        set_page_dirty_lock(page);
 324                } else {
 325                        tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
 326                }
 327                put_page(page);
 328
 329                /* error accessing the tracer's buffer */
 330                if (!tags)
 331                        break;
 332
 333                len -= tags;
 334                buf += tags;
 335                addr += tags * MTE_GRANULE_SIZE;
 336        }
 337        mmap_read_unlock(mm);
 338
 339        /* return an error if no tags copied */
 340        kiov->iov_len = buf - kiov->iov_base;
 341        if (!kiov->iov_len) {
 342                /* check for error accessing the tracee's address space */
 343                if (ret <= 0)
 344                        return -EIO;
 345                else
 346                        return -EFAULT;
 347        }
 348
 349        return 0;
 350}
 351
 352/*
 353 * Copy MTE tags in another process' address space at 'addr' to/from tracer's
 354 * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
 355 */
 356static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
 357                              struct iovec *kiov, unsigned int gup_flags)
 358{
 359        struct mm_struct *mm;
 360        int ret;
 361
 362        mm = get_task_mm(tsk);
 363        if (!mm)
 364                return -EPERM;
 365
 366        if (!tsk->ptrace || (current != tsk->parent) ||
 367            ((get_dumpable(mm) != SUID_DUMP_USER) &&
 368             !ptracer_capable(tsk, mm->user_ns))) {
 369                mmput(mm);
 370                return -EPERM;
 371        }
 372
 373        ret = __access_remote_tags(mm, addr, kiov, gup_flags);
 374        mmput(mm);
 375
 376        return ret;
 377}
 378
 379int mte_ptrace_copy_tags(struct task_struct *child, long request,
 380                         unsigned long addr, unsigned long data)
 381{
 382        int ret;
 383        struct iovec kiov;
 384        struct iovec __user *uiov = (void __user *)data;
 385        unsigned int gup_flags = FOLL_FORCE;
 386
 387        if (!system_supports_mte())
 388                return -EIO;
 389
 390        if (get_user(kiov.iov_base, &uiov->iov_base) ||
 391            get_user(kiov.iov_len, &uiov->iov_len))
 392                return -EFAULT;
 393
 394        if (request == PTRACE_POKEMTETAGS)
 395                gup_flags |= FOLL_WRITE;
 396
 397        /* align addr to the MTE tag granule */
 398        addr &= MTE_GRANULE_MASK;
 399
 400        ret = access_remote_tags(child, addr, &kiov, gup_flags);
 401        if (!ret)
 402                ret = put_user(kiov.iov_len, &uiov->iov_len);
 403
 404        return ret;
 405}
 406
 407static ssize_t mte_tcf_preferred_show(struct device *dev,
 408                                      struct device_attribute *attr, char *buf)
 409{
 410        switch (per_cpu(mte_tcf_preferred, dev->id)) {
 411        case MTE_CTRL_TCF_ASYNC:
 412                return sysfs_emit(buf, "async\n");
 413        case MTE_CTRL_TCF_SYNC:
 414                return sysfs_emit(buf, "sync\n");
 415        default:
 416                return sysfs_emit(buf, "???\n");
 417        }
 418}
 419
 420static ssize_t mte_tcf_preferred_store(struct device *dev,
 421                                       struct device_attribute *attr,
 422                                       const char *buf, size_t count)
 423{
 424        u64 tcf;
 425
 426        if (sysfs_streq(buf, "async"))
 427                tcf = MTE_CTRL_TCF_ASYNC;
 428        else if (sysfs_streq(buf, "sync"))
 429                tcf = MTE_CTRL_TCF_SYNC;
 430        else
 431                return -EINVAL;
 432
 433        device_lock(dev);
 434        per_cpu(mte_tcf_preferred, dev->id) = tcf;
 435        device_unlock(dev);
 436
 437        return count;
 438}
 439static DEVICE_ATTR_RW(mte_tcf_preferred);
 440
 441static int register_mte_tcf_preferred_sysctl(void)
 442{
 443        unsigned int cpu;
 444
 445        if (!system_supports_mte())
 446                return 0;
 447
 448        for_each_possible_cpu(cpu) {
 449                per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
 450                device_create_file(get_cpu_device(cpu),
 451                                   &dev_attr_mte_tcf_preferred);
 452        }
 453
 454        return 0;
 455}
 456subsys_initcall(register_mte_tcf_preferred_sysctl);
 457