linux/arch/arm64/kernel/mte.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2020 ARM Ltd.
   4 */
   5
   6#include <linux/bitops.h>
   7#include <linux/cpu.h>
   8#include <linux/kernel.h>
   9#include <linux/mm.h>
  10#include <linux/prctl.h>
  11#include <linux/sched.h>
  12#include <linux/sched/mm.h>
  13#include <linux/string.h>
  14#include <linux/swap.h>
  15#include <linux/swapops.h>
  16#include <linux/thread_info.h>
  17#include <linux/types.h>
  18#include <linux/uio.h>
  19
  20#include <asm/barrier.h>
  21#include <asm/cpufeature.h>
  22#include <asm/mte.h>
  23#include <asm/ptrace.h>
  24#include <asm/sysreg.h>
  25
  26static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
  27
  28#ifdef CONFIG_KASAN_HW_TAGS
  29/*
  30 * The asynchronous and asymmetric MTE modes have the same behavior for
  31 * store operations. This flag is set when either of these modes is enabled.
  32 */
  33DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
  34EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
  35#endif
  36
  37static void mte_sync_page_tags(struct page *page, pte_t old_pte,
  38                               bool check_swap, bool pte_is_tagged)
  39{
  40        if (check_swap && is_swap_pte(old_pte)) {
  41                swp_entry_t entry = pte_to_swp_entry(old_pte);
  42
  43                if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
  44                        return;
  45        }
  46
  47        if (!pte_is_tagged)
  48                return;
  49
  50        page_kasan_tag_reset(page);
  51        /*
  52         * We need smp_wmb() in between setting the flags and clearing the
  53         * tags because if another thread reads page->flags and builds a
  54         * tagged address out of it, there is an actual dependency to the
  55         * memory access, but on the current thread we do not guarantee that
  56         * the new page->flags are visible before the tags were updated.
  57         */
  58        smp_wmb();
  59        mte_clear_page_tags(page_address(page));
  60}
  61
  62void mte_sync_tags(pte_t old_pte, pte_t pte)
  63{
  64        struct page *page = pte_page(pte);
  65        long i, nr_pages = compound_nr(page);
  66        bool check_swap = nr_pages == 1;
  67        bool pte_is_tagged = pte_tagged(pte);
  68
  69        /* Early out if there's nothing to do */
  70        if (!check_swap && !pte_is_tagged)
  71                return;
  72
  73        /* if PG_mte_tagged is set, tags have already been initialised */
  74        for (i = 0; i < nr_pages; i++, page++) {
  75                if (!test_and_set_bit(PG_mte_tagged, &page->flags))
  76                        mte_sync_page_tags(page, old_pte, check_swap,
  77                                           pte_is_tagged);
  78        }
  79
  80        /* ensure the tags are visible before the PTE is set */
  81        smp_wmb();
  82}
  83
  84int memcmp_pages(struct page *page1, struct page *page2)
  85{
  86        char *addr1, *addr2;
  87        int ret;
  88
  89        addr1 = page_address(page1);
  90        addr2 = page_address(page2);
  91        ret = memcmp(addr1, addr2, PAGE_SIZE);
  92
  93        if (!system_supports_mte() || ret)
  94                return ret;
  95
  96        /*
  97         * If the page content is identical but at least one of the pages is
  98         * tagged, return non-zero to avoid KSM merging. If only one of the
  99         * pages is tagged, set_pte_at() may zero or change the tags of the
 100         * other page via mte_sync_tags().
 101         */
 102        if (test_bit(PG_mte_tagged, &page1->flags) ||
 103            test_bit(PG_mte_tagged, &page2->flags))
 104                return addr1 != addr2;
 105
 106        return ret;
 107}
 108
 109static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
 110{
 111        /* Enable MTE Sync Mode for EL1. */
 112        sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
 113        isb();
 114
 115        pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
 116}
 117
 118#ifdef CONFIG_KASAN_HW_TAGS
 119void mte_enable_kernel_sync(void)
 120{
 121        /*
 122         * Make sure we enter this function when no PE has set
 123         * async mode previously.
 124         */
 125        WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
 126                        "MTE async mode enabled system wide!");
 127
 128        __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
 129}
 130
 131void mte_enable_kernel_async(void)
 132{
 133        __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
 134
 135        /*
 136         * MTE async mode is set system wide by the first PE that
 137         * executes this function.
 138         *
 139         * Note: If in future KASAN acquires a runtime switching
 140         * mode in between sync and async, this strategy needs
 141         * to be reviewed.
 142         */
 143        if (!system_uses_mte_async_or_asymm_mode())
 144                static_branch_enable(&mte_async_or_asymm_mode);
 145}
 146
 147void mte_enable_kernel_asymm(void)
 148{
 149        if (cpus_have_cap(ARM64_MTE_ASYMM)) {
 150                __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM);
 151
 152                /*
 153                 * MTE asymm mode behaves as async mode for store
 154                 * operations. The mode is set system wide by the
 155                 * first PE that executes this function.
 156                 *
 157                 * Note: If in future KASAN acquires a runtime switching
 158                 * mode in between sync and async, this strategy needs
 159                 * to be reviewed.
 160                 */
 161                if (!system_uses_mte_async_or_asymm_mode())
 162                        static_branch_enable(&mte_async_or_asymm_mode);
 163        } else {
 164                /*
 165                 * If the CPU does not support MTE asymmetric mode the
 166                 * kernel falls back on synchronous mode which is the
 167                 * default for kasan=on.
 168                 */
 169                mte_enable_kernel_sync();
 170        }
 171}
 172#endif
 173
 174#ifdef CONFIG_KASAN_HW_TAGS
 175void mte_check_tfsr_el1(void)
 176{
 177        u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
 178
 179        if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
 180                /*
 181                 * Note: isb() is not required after this direct write
 182                 * because there is no indirect read subsequent to it
 183                 * (per ARM DDI 0487F.c table D13-1).
 184                 */
 185                write_sysreg_s(0, SYS_TFSR_EL1);
 186
 187                kasan_report_async();
 188        }
 189}
 190#endif
 191
 192/*
 193 * This is where we actually resolve the system and process MTE mode
 194 * configuration into an actual value in SCTLR_EL1 that affects
 195 * userspace.
 196 */
 197static void mte_update_sctlr_user(struct task_struct *task)
 198{
 199        /*
 200         * This must be called with preemption disabled and can only be called
 201         * on the current or next task since the CPU must match where the thread
 202         * is going to run. The caller is responsible for calling
 203         * update_sctlr_el1() later in the same preemption disabled block.
 204         */
 205        unsigned long sctlr = task->thread.sctlr_user;
 206        unsigned long mte_ctrl = task->thread.mte_ctrl;
 207        unsigned long pref, resolved_mte_tcf;
 208
 209        pref = __this_cpu_read(mte_tcf_preferred);
 210        /*
 211         * If there is no overlap between the system preferred and
 212         * program requested values go with what was requested.
 213         */
 214        resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 215        sctlr &= ~SCTLR_EL1_TCF0_MASK;
 216        /*
 217         * Pick an actual setting. The order in which we check for
 218         * set bits and map into register values determines our
 219         * default order.
 220         */
 221        if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
 222                sctlr |= SCTLR_EL1_TCF0_ASYMM;
 223        else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 224                sctlr |= SCTLR_EL1_TCF0_ASYNC;
 225        else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 226                sctlr |= SCTLR_EL1_TCF0_SYNC;
 227        task->thread.sctlr_user = sctlr;
 228}
 229
 230static void mte_update_gcr_excl(struct task_struct *task)
 231{
 232        /*
 233         * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
 234         * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
 235         */
 236        if (kasan_hw_tags_enabled())
 237                return;
 238
 239        write_sysreg_s(
 240                ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
 241                 SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
 242                SYS_GCR_EL1);
 243}
 244
 245void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
 246                                 __le32 *updptr, int nr_inst)
 247{
 248        BUG_ON(nr_inst != 1); /* Branch -> NOP */
 249
 250        if (kasan_hw_tags_enabled())
 251                *updptr = cpu_to_le32(aarch64_insn_gen_nop());
 252}
 253
 254void mte_thread_init_user(void)
 255{
 256        if (!system_supports_mte())
 257                return;
 258
 259        /* clear any pending asynchronous tag fault */
 260        dsb(ish);
 261        write_sysreg_s(0, SYS_TFSRE0_EL1);
 262        clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 263        /* disable tag checking and reset tag generation mask */
 264        set_mte_ctrl(current, 0);
 265}
 266
 267void mte_thread_switch(struct task_struct *next)
 268{
 269        if (!system_supports_mte())
 270                return;
 271
 272        mte_update_sctlr_user(next);
 273        mte_update_gcr_excl(next);
 274
 275        /* TCO may not have been disabled on exception entry for the current task. */
 276        mte_disable_tco_entry(next);
 277
 278        /*
 279         * Check if an async tag exception occurred at EL1.
 280         *
 281         * Note: On the context switch path we rely on the dsb() present
 282         * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
 283         * are synchronized before this point.
 284         */
 285        isb();
 286        mte_check_tfsr_el1();
 287}
 288
 289void mte_suspend_enter(void)
 290{
 291        if (!system_supports_mte())
 292                return;
 293
 294        /*
 295         * The barriers are required to guarantee that the indirect writes
 296         * to TFSR_EL1 are synchronized before we report the state.
 297         */
 298        dsb(nsh);
 299        isb();
 300
 301        /* Report SYS_TFSR_EL1 before suspend entry */
 302        mte_check_tfsr_el1();
 303}
 304
 305long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 306{
 307        u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 308                        SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
 309
 310        if (!system_supports_mte())
 311                return 0;
 312
 313        if (arg & PR_MTE_TCF_ASYNC)
 314                mte_ctrl |= MTE_CTRL_TCF_ASYNC;
 315        if (arg & PR_MTE_TCF_SYNC)
 316                mte_ctrl |= MTE_CTRL_TCF_SYNC;
 317
 318        /*
 319         * If the system supports it and both sync and async modes are
 320         * specified then implicitly enable asymmetric mode.
 321         * Userspace could see a mix of both sync and async anyway due
 322         * to differing or changing defaults on CPUs.
 323         */
 324        if (cpus_have_cap(ARM64_MTE_ASYMM) &&
 325            (arg & PR_MTE_TCF_ASYNC) &&
 326            (arg & PR_MTE_TCF_SYNC))
 327                mte_ctrl |= MTE_CTRL_TCF_ASYMM;
 328
 329        task->thread.mte_ctrl = mte_ctrl;
 330        if (task == current) {
 331                preempt_disable();
 332                mte_update_sctlr_user(task);
 333                mte_update_gcr_excl(task);
 334                update_sctlr_el1(task->thread.sctlr_user);
 335                preempt_enable();
 336        }
 337
 338        return 0;
 339}
 340
 341long get_mte_ctrl(struct task_struct *task)
 342{
 343        unsigned long ret;
 344        u64 mte_ctrl = task->thread.mte_ctrl;
 345        u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
 346                   SYS_GCR_EL1_EXCL_MASK;
 347
 348        if (!system_supports_mte())
 349                return 0;
 350
 351        ret = incl << PR_MTE_TAG_SHIFT;
 352        if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
 353                ret |= PR_MTE_TCF_ASYNC;
 354        if (mte_ctrl & MTE_CTRL_TCF_SYNC)
 355                ret |= PR_MTE_TCF_SYNC;
 356
 357        return ret;
 358}
 359
 360/*
 361 * Access MTE tags in another process' address space as given in mm. Update
 362 * the number of tags copied. Return 0 if any tags copied, error otherwise.
 363 * Inspired by __access_remote_vm().
 364 */
 365static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 366                                struct iovec *kiov, unsigned int gup_flags)
 367{
 368        struct vm_area_struct *vma;
 369        void __user *buf = kiov->iov_base;
 370        size_t len = kiov->iov_len;
 371        int ret;
 372        int write = gup_flags & FOLL_WRITE;
 373
 374        if (!access_ok(buf, len))
 375                return -EFAULT;
 376
 377        if (mmap_read_lock_killable(mm))
 378                return -EIO;
 379
 380        while (len) {
 381                unsigned long tags, offset;
 382                void *maddr;
 383                struct page *page = NULL;
 384
 385                ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
 386                                            &vma, NULL);
 387                if (ret <= 0)
 388                        break;
 389
 390                /*
 391                 * Only copy tags if the page has been mapped as PROT_MTE
 392                 * (PG_mte_tagged set). Otherwise the tags are not valid and
 393                 * not accessible to user. Moreover, an mprotect(PROT_MTE)
 394                 * would cause the existing tags to be cleared if the page
 395                 * was never mapped with PROT_MTE.
 396                 */
 397                if (!(vma->vm_flags & VM_MTE)) {
 398                        ret = -EOPNOTSUPP;
 399                        put_page(page);
 400                        break;
 401                }
 402                WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
 403
 404                /* limit access to the end of the page */
 405                offset = offset_in_page(addr);
 406                tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
 407
 408                maddr = page_address(page);
 409                if (write) {
 410                        tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
 411                        set_page_dirty_lock(page);
 412                } else {
 413                        tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
 414                }
 415                put_page(page);
 416
 417                /* error accessing the tracer's buffer */
 418                if (!tags)
 419                        break;
 420
 421                len -= tags;
 422                buf += tags;
 423                addr += tags * MTE_GRANULE_SIZE;
 424        }
 425        mmap_read_unlock(mm);
 426
 427        /* return an error if no tags copied */
 428        kiov->iov_len = buf - kiov->iov_base;
 429        if (!kiov->iov_len) {
 430                /* check for error accessing the tracee's address space */
 431                if (ret <= 0)
 432                        return -EIO;
 433                else
 434                        return -EFAULT;
 435        }
 436
 437        return 0;
 438}
 439
 440/*
 441 * Copy MTE tags in another process' address space at 'addr' to/from tracer's
 442 * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
 443 */
 444static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
 445                              struct iovec *kiov, unsigned int gup_flags)
 446{
 447        struct mm_struct *mm;
 448        int ret;
 449
 450        mm = get_task_mm(tsk);
 451        if (!mm)
 452                return -EPERM;
 453
 454        if (!tsk->ptrace || (current != tsk->parent) ||
 455            ((get_dumpable(mm) != SUID_DUMP_USER) &&
 456             !ptracer_capable(tsk, mm->user_ns))) {
 457                mmput(mm);
 458                return -EPERM;
 459        }
 460
 461        ret = __access_remote_tags(mm, addr, kiov, gup_flags);
 462        mmput(mm);
 463
 464        return ret;
 465}
 466
 467int mte_ptrace_copy_tags(struct task_struct *child, long request,
 468                         unsigned long addr, unsigned long data)
 469{
 470        int ret;
 471        struct iovec kiov;
 472        struct iovec __user *uiov = (void __user *)data;
 473        unsigned int gup_flags = FOLL_FORCE;
 474
 475        if (!system_supports_mte())
 476                return -EIO;
 477
 478        if (get_user(kiov.iov_base, &uiov->iov_base) ||
 479            get_user(kiov.iov_len, &uiov->iov_len))
 480                return -EFAULT;
 481
 482        if (request == PTRACE_POKEMTETAGS)
 483                gup_flags |= FOLL_WRITE;
 484
 485        /* align addr to the MTE tag granule */
 486        addr &= MTE_GRANULE_MASK;
 487
 488        ret = access_remote_tags(child, addr, &kiov, gup_flags);
 489        if (!ret)
 490                ret = put_user(kiov.iov_len, &uiov->iov_len);
 491
 492        return ret;
 493}
 494
 495static ssize_t mte_tcf_preferred_show(struct device *dev,
 496                                      struct device_attribute *attr, char *buf)
 497{
 498        switch (per_cpu(mte_tcf_preferred, dev->id)) {
 499        case MTE_CTRL_TCF_ASYNC:
 500                return sysfs_emit(buf, "async\n");
 501        case MTE_CTRL_TCF_SYNC:
 502                return sysfs_emit(buf, "sync\n");
 503        case MTE_CTRL_TCF_ASYMM:
 504                return sysfs_emit(buf, "asymm\n");
 505        default:
 506                return sysfs_emit(buf, "???\n");
 507        }
 508}
 509
 510static ssize_t mte_tcf_preferred_store(struct device *dev,
 511                                       struct device_attribute *attr,
 512                                       const char *buf, size_t count)
 513{
 514        u64 tcf;
 515
 516        if (sysfs_streq(buf, "async"))
 517                tcf = MTE_CTRL_TCF_ASYNC;
 518        else if (sysfs_streq(buf, "sync"))
 519                tcf = MTE_CTRL_TCF_SYNC;
 520        else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
 521                tcf = MTE_CTRL_TCF_ASYMM;
 522        else
 523                return -EINVAL;
 524
 525        device_lock(dev);
 526        per_cpu(mte_tcf_preferred, dev->id) = tcf;
 527        device_unlock(dev);
 528
 529        return count;
 530}
 531static DEVICE_ATTR_RW(mte_tcf_preferred);
 532
 533static int register_mte_tcf_preferred_sysctl(void)
 534{
 535        unsigned int cpu;
 536
 537        if (!system_supports_mte())
 538                return 0;
 539
 540        for_each_possible_cpu(cpu) {
 541                per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
 542                device_create_file(get_cpu_device(cpu),
 543                                   &dev_attr_mte_tcf_preferred);
 544        }
 545
 546        return 0;
 547}
 548subsys_initcall(register_mte_tcf_preferred_sysctl);
 549