linux/arch/arm64/kernel/fpsimd.c
<<
>>
Prefs
   1/*
   2 * FP/SIMD context switching and fault handling
   3 *
   4 * Copyright (C) 2012 ARM Ltd.
   5 * Author: Catalin Marinas <catalin.marinas@arm.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include <linux/bitmap.h>
  21#include <linux/bottom_half.h>
  22#include <linux/bug.h>
  23#include <linux/cache.h>
  24#include <linux/compat.h>
  25#include <linux/cpu.h>
  26#include <linux/cpu_pm.h>
  27#include <linux/kernel.h>
  28#include <linux/linkage.h>
  29#include <linux/irqflags.h>
  30#include <linux/init.h>
  31#include <linux/percpu.h>
  32#include <linux/prctl.h>
  33#include <linux/preempt.h>
  34#include <linux/prctl.h>
  35#include <linux/ptrace.h>
  36#include <linux/sched/signal.h>
  37#include <linux/sched/task_stack.h>
  38#include <linux/signal.h>
  39#include <linux/slab.h>
  40#include <linux/sysctl.h>
  41
  42#include <asm/esr.h>
  43#include <asm/fpsimd.h>
  44#include <asm/cpufeature.h>
  45#include <asm/cputype.h>
  46#include <asm/simd.h>
  47#include <asm/sigcontext.h>
  48#include <asm/sysreg.h>
  49#include <asm/traps.h>
  50
  51#define FPEXC_IOF       (1 << 0)
  52#define FPEXC_DZF       (1 << 1)
  53#define FPEXC_OFF       (1 << 2)
  54#define FPEXC_UFF       (1 << 3)
  55#define FPEXC_IXF       (1 << 4)
  56#define FPEXC_IDF       (1 << 7)
  57
  58/*
  59 * (Note: in this discussion, statements about FPSIMD apply equally to SVE.)
  60 *
  61 * In order to reduce the number of times the FPSIMD state is needlessly saved
  62 * and restored, we need to keep track of two things:
  63 * (a) for each task, we need to remember which CPU was the last one to have
  64 *     the task's FPSIMD state loaded into its FPSIMD registers;
  65 * (b) for each CPU, we need to remember which task's userland FPSIMD state has
  66 *     been loaded into its FPSIMD registers most recently, or whether it has
  67 *     been used to perform kernel mode NEON in the meantime.
  68 *
  69 * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to
  70 * the id of the current CPU every time the state is loaded onto a CPU. For (b),
  71 * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
  72 * address of the userland FPSIMD state of the task that was loaded onto the CPU
  73 * the most recently, or NULL if kernel mode NEON has been performed after that.
  74 *
  75 * With this in place, we no longer have to restore the next FPSIMD state right
  76 * when switching between tasks. Instead, we can defer this check to userland
  77 * resume, at which time we verify whether the CPU's fpsimd_last_state and the
  78 * task's fpsimd_cpu are still mutually in sync. If this is the case, we
  79 * can omit the FPSIMD restore.
  80 *
  81 * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
  82 * indicate whether or not the userland FPSIMD state of the current task is
  83 * present in the registers. The flag is set unless the FPSIMD registers of this
  84 * CPU currently contain the most recent userland FPSIMD state of the current
  85 * task.
  86 *
  87 * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
  88 * save the task's FPSIMD context back to task_struct from softirq context.
  89 * To prevent this from racing with the manipulation of the task's FPSIMD state
  90 * from task context and thereby corrupting the state, it is necessary to
  91 * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
  92 * flag with local_bh_disable() unless softirqs are already masked.
  93 *
  94 * For a certain task, the sequence may look something like this:
  95 * - the task gets scheduled in; if both the task's fpsimd_cpu field
  96 *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
  97 *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
  98 *   cleared, otherwise it is set;
  99 *
 100 * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
 101 *   userland FPSIMD state is copied from memory to the registers, the task's
 102 *   fpsimd_cpu field is set to the id of the current CPU, the current
 103 *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
 104 *   TIF_FOREIGN_FPSTATE flag is cleared;
 105 *
 106 * - the task executes an ordinary syscall; upon return to userland, the
 107 *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
 108 *   restored;
 109 *
 110 * - the task executes a syscall which executes some NEON instructions; this is
 111 *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
 112 *   register contents to memory, clears the fpsimd_last_state per-cpu variable
 113 *   and sets the TIF_FOREIGN_FPSTATE flag;
 114 *
 115 * - the task gets preempted after kernel_neon_end() is called; as we have not
 116 *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
 117 *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
 118 */
 119struct fpsimd_last_state_struct {
 120        struct user_fpsimd_state *st;
 121        bool sve_in_use;
 122};
 123
 124static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
 125
 126/* Default VL for tasks that don't set it explicitly: */
 127static int sve_default_vl = -1;
 128
 129#ifdef CONFIG_ARM64_SVE
 130
 131/* Maximum supported vector length across all CPUs (initially poisoned) */
 132int __ro_after_init sve_max_vl = -1;
 133/* Set of available vector lengths, as vq_to_bit(vq): */
 134static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
 135static void __percpu *efi_sve_state;
 136
 137#else /* ! CONFIG_ARM64_SVE */
 138
 139/* Dummy declaration for code that will be optimised out: */
 140extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
 141extern void __percpu *efi_sve_state;
 142
 143#endif /* ! CONFIG_ARM64_SVE */
 144
 145/*
 146 * Call __sve_free() directly only if you know task can't be scheduled
 147 * or preempted.
 148 */
 149static void __sve_free(struct task_struct *task)
 150{
 151        kfree(task->thread.sve_state);
 152        task->thread.sve_state = NULL;
 153}
 154
 155static void sve_free(struct task_struct *task)
 156{
 157        WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
 158
 159        __sve_free(task);
 160}
 161
 162
 163/* Offset of FFR in the SVE register dump */
 164static size_t sve_ffr_offset(int vl)
 165{
 166        return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
 167}
 168
 169static void *sve_pffr(struct task_struct *task)
 170{
 171        return (char *)task->thread.sve_state +
 172                sve_ffr_offset(task->thread.sve_vl);
 173}
 174
 175static void change_cpacr(u64 val, u64 mask)
 176{
 177        u64 cpacr = read_sysreg(CPACR_EL1);
 178        u64 new = (cpacr & ~mask) | val;
 179
 180        if (new != cpacr)
 181                write_sysreg(new, CPACR_EL1);
 182}
 183
 184static void sve_user_disable(void)
 185{
 186        change_cpacr(0, CPACR_EL1_ZEN_EL0EN);
 187}
 188
 189static void sve_user_enable(void)
 190{
 191        change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN);
 192}
 193
 194/*
 195 * TIF_SVE controls whether a task can use SVE without trapping while
 196 * in userspace, and also the way a task's FPSIMD/SVE state is stored
 197 * in thread_struct.
 198 *
 199 * The kernel uses this flag to track whether a user task is actively
 200 * using SVE, and therefore whether full SVE register state needs to
 201 * be tracked.  If not, the cheaper FPSIMD context handling code can
 202 * be used instead of the more costly SVE equivalents.
 203 *
 204 *  * TIF_SVE set:
 205 *
 206 *    The task can execute SVE instructions while in userspace without
 207 *    trapping to the kernel.
 208 *
 209 *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
 210 *    corresponding Zn), P0-P15 and FFR are encoded in in
 211 *    task->thread.sve_state, formatted appropriately for vector
 212 *    length task->thread.sve_vl.
 213 *
 214 *    task->thread.sve_state must point to a valid buffer at least
 215 *    sve_state_size(task) bytes in size.
 216 *
 217 *    During any syscall, the kernel may optionally clear TIF_SVE and
 218 *    discard the vector state except for the FPSIMD subset.
 219 *
 220 *  * TIF_SVE clear:
 221 *
 222 *    An attempt by the user task to execute an SVE instruction causes
 223 *    do_sve_acc() to be called, which does some preparation and then
 224 *    sets TIF_SVE.
 225 *
 226 *    When stored, FPSIMD registers V0-V31 are encoded in
 227 *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
 228 *    logically zero but not stored anywhere; P0-P15 and FFR are not
 229 *    stored and have unspecified values from userspace's point of
 230 *    view.  For hygiene purposes, the kernel zeroes them on next use,
 231 *    but userspace is discouraged from relying on this.
 232 *
 233 *    task->thread.sve_state does not need to be non-NULL, valid or any
 234 *    particular size: it must not be dereferenced.
 235 *
 236 *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
 237 *    irrespective of whether TIF_SVE is clear or set, since these are
 238 *    not vector length dependent.
 239 */
 240
 241/*
 242 * Update current's FPSIMD/SVE registers from thread_struct.
 243 *
 244 * This function should be called only when the FPSIMD/SVE state in
 245 * thread_struct is known to be up to date, when preparing to enter
 246 * userspace.
 247 *
 248 * Softirqs (and preemption) must be disabled.
 249 */
 250static void task_fpsimd_load(void)
 251{
 252        WARN_ON(!in_softirq() && !irqs_disabled());
 253
 254        if (system_supports_sve() && test_thread_flag(TIF_SVE))
 255                sve_load_state(sve_pffr(current),
 256                               &current->thread.uw.fpsimd_state.fpsr,
 257                               sve_vq_from_vl(current->thread.sve_vl) - 1);
 258        else
 259                fpsimd_load_state(&current->thread.uw.fpsimd_state);
 260
 261        if (system_supports_sve()) {
 262                /* Toggle SVE trapping for userspace if needed */
 263                if (test_thread_flag(TIF_SVE))
 264                        sve_user_enable();
 265                else
 266                        sve_user_disable();
 267
 268                /* Serialised by exception return to user */
 269        }
 270}
 271
 272/*
 273 * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
 274 * with respect to the CPU registers.
 275 *
 276 * Softirqs (and preemption) must be disabled.
 277 */
 278static void task_fpsimd_save(void)
 279{
 280        WARN_ON(!in_softirq() && !irqs_disabled());
 281
 282        if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
 283                if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
 284                        if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
 285                                /*
 286                                 * Can't save the user regs, so current would
 287                                 * re-enter user with corrupt state.
 288                                 * There's no way to recover, so kill it:
 289                                 */
 290                                force_signal_inject(SIGKILL, SI_KERNEL, 0);
 291                                return;
 292                        }
 293
 294                        sve_save_state(sve_pffr(current),
 295                                       &current->thread.uw.fpsimd_state.fpsr);
 296                } else
 297                        fpsimd_save_state(&current->thread.uw.fpsimd_state);
 298        }
 299}
 300
 301/*
 302 * Helpers to translate bit indices in sve_vq_map to VQ values (and
 303 * vice versa).  This allows find_next_bit() to be used to find the
 304 * _maximum_ VQ not exceeding a certain value.
 305 */
 306
 307static unsigned int vq_to_bit(unsigned int vq)
 308{
 309        return SVE_VQ_MAX - vq;
 310}
 311
 312static unsigned int bit_to_vq(unsigned int bit)
 313{
 314        if (WARN_ON(bit >= SVE_VQ_MAX))
 315                bit = SVE_VQ_MAX - 1;
 316
 317        return SVE_VQ_MAX - bit;
 318}
 319
 320/*
 321 * All vector length selection from userspace comes through here.
 322 * We're on a slow path, so some sanity-checks are included.
 323 * If things go wrong there's a bug somewhere, but try to fall back to a
 324 * safe choice.
 325 */
 326static unsigned int find_supported_vector_length(unsigned int vl)
 327{
 328        int bit;
 329        int max_vl = sve_max_vl;
 330
 331        if (WARN_ON(!sve_vl_valid(vl)))
 332                vl = SVE_VL_MIN;
 333
 334        if (WARN_ON(!sve_vl_valid(max_vl)))
 335                max_vl = SVE_VL_MIN;
 336
 337        if (vl > max_vl)
 338                vl = max_vl;
 339
 340        bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
 341                            vq_to_bit(sve_vq_from_vl(vl)));
 342        return sve_vl_from_vq(bit_to_vq(bit));
 343}
 344
 345#ifdef CONFIG_SYSCTL
 346
 347static int sve_proc_do_default_vl(struct ctl_table *table, int write,
 348                                  void __user *buffer, size_t *lenp,
 349                                  loff_t *ppos)
 350{
 351        int ret;
 352        int vl = sve_default_vl;
 353        struct ctl_table tmp_table = {
 354                .data = &vl,
 355                .maxlen = sizeof(vl),
 356        };
 357
 358        ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
 359        if (ret || !write)
 360                return ret;
 361
 362        /* Writing -1 has the special meaning "set to max": */
 363        if (vl == -1) {
 364                /* Fail safe if sve_max_vl wasn't initialised */
 365                if (WARN_ON(!sve_vl_valid(sve_max_vl)))
 366                        vl = SVE_VL_MIN;
 367                else
 368                        vl = sve_max_vl;
 369
 370                goto chosen;
 371        }
 372
 373        if (!sve_vl_valid(vl))
 374                return -EINVAL;
 375
 376        vl = find_supported_vector_length(vl);
 377chosen:
 378        sve_default_vl = vl;
 379        return 0;
 380}
 381
 382static struct ctl_table sve_default_vl_table[] = {
 383        {
 384                .procname       = "sve_default_vector_length",
 385                .mode           = 0644,
 386                .proc_handler   = sve_proc_do_default_vl,
 387        },
 388        { }
 389};
 390
 391static int __init sve_sysctl_init(void)
 392{
 393        if (system_supports_sve())
 394                if (!register_sysctl("abi", sve_default_vl_table))
 395                        return -EINVAL;
 396
 397        return 0;
 398}
 399
 400#else /* ! CONFIG_SYSCTL */
 401static int __init sve_sysctl_init(void) { return 0; }
 402#endif /* ! CONFIG_SYSCTL */
 403
 404#define ZREG(sve_state, vq, n) ((char *)(sve_state) +           \
 405        (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
 406
 407/*
 408 * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
 409 * task->thread.sve_state.
 410 *
 411 * Task can be a non-runnable task, or current.  In the latter case,
 412 * softirqs (and preemption) must be disabled.
 413 * task->thread.sve_state must point to at least sve_state_size(task)
 414 * bytes of allocated kernel memory.
 415 * task->thread.uw.fpsimd_state must be up to date before calling this
 416 * function.
 417 */
 418static void fpsimd_to_sve(struct task_struct *task)
 419{
 420        unsigned int vq;
 421        void *sst = task->thread.sve_state;
 422        struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 423        unsigned int i;
 424
 425        if (!system_supports_sve())
 426                return;
 427
 428        vq = sve_vq_from_vl(task->thread.sve_vl);
 429        for (i = 0; i < 32; ++i)
 430                memcpy(ZREG(sst, vq, i), &fst->vregs[i],
 431                       sizeof(fst->vregs[i]));
 432}
 433
 434/*
 435 * Transfer the SVE state in task->thread.sve_state to
 436 * task->thread.uw.fpsimd_state.
 437 *
 438 * Task can be a non-runnable task, or current.  In the latter case,
 439 * softirqs (and preemption) must be disabled.
 440 * task->thread.sve_state must point to at least sve_state_size(task)
 441 * bytes of allocated kernel memory.
 442 * task->thread.sve_state must be up to date before calling this function.
 443 */
 444static void sve_to_fpsimd(struct task_struct *task)
 445{
 446        unsigned int vq;
 447        void const *sst = task->thread.sve_state;
 448        struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
 449        unsigned int i;
 450
 451        if (!system_supports_sve())
 452                return;
 453
 454        vq = sve_vq_from_vl(task->thread.sve_vl);
 455        for (i = 0; i < 32; ++i)
 456                memcpy(&fst->vregs[i], ZREG(sst, vq, i),
 457                       sizeof(fst->vregs[i]));
 458}
 459
 460#ifdef CONFIG_ARM64_SVE
 461
 462/*
 463 * Return how many bytes of memory are required to store the full SVE
 464 * state for task, given task's currently configured vector length.
 465 */
 466size_t sve_state_size(struct task_struct const *task)
 467{
 468        return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
 469}
 470
 471/*
 472 * Ensure that task->thread.sve_state is allocated and sufficiently large.
 473 *
 474 * This function should be used only in preparation for replacing
 475 * task->thread.sve_state with new data.  The memory is always zeroed
 476 * here to prevent stale data from showing through: this is done in
 477 * the interest of testability and predictability: except in the
 478 * do_sve_acc() case, there is no ABI requirement to hide stale data
 479 * written previously be task.
 480 */
 481void sve_alloc(struct task_struct *task)
 482{
 483        if (task->thread.sve_state) {
 484                memset(task->thread.sve_state, 0, sve_state_size(current));
 485                return;
 486        }
 487
 488        /* This is a small allocation (maximum ~8KB) and Should Not Fail. */
 489        task->thread.sve_state =
 490                kzalloc(sve_state_size(task), GFP_KERNEL);
 491
 492        /*
 493         * If future SVE revisions can have larger vectors though,
 494         * this may cease to be true:
 495         */
 496        BUG_ON(!task->thread.sve_state);
 497}
 498
 499
 500/*
 501 * Ensure that task->thread.sve_state is up to date with respect to
 502 * the user task, irrespective of when SVE is in use or not.
 503 *
 504 * This should only be called by ptrace.  task must be non-runnable.
 505 * task->thread.sve_state must point to at least sve_state_size(task)
 506 * bytes of allocated kernel memory.
 507 */
 508void fpsimd_sync_to_sve(struct task_struct *task)
 509{
 510        if (!test_tsk_thread_flag(task, TIF_SVE))
 511                fpsimd_to_sve(task);
 512}
 513
 514/*
 515 * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
 516 * the user task, irrespective of whether SVE is in use or not.
 517 *
 518 * This should only be called by ptrace.  task must be non-runnable.
 519 * task->thread.sve_state must point to at least sve_state_size(task)
 520 * bytes of allocated kernel memory.
 521 */
 522void sve_sync_to_fpsimd(struct task_struct *task)
 523{
 524        if (test_tsk_thread_flag(task, TIF_SVE))
 525                sve_to_fpsimd(task);
 526}
 527
 528/*
 529 * Ensure that task->thread.sve_state is up to date with respect to
 530 * the task->thread.uw.fpsimd_state.
 531 *
 532 * This should only be called by ptrace to merge new FPSIMD register
 533 * values into a task for which SVE is currently active.
 534 * task must be non-runnable.
 535 * task->thread.sve_state must point to at least sve_state_size(task)
 536 * bytes of allocated kernel memory.
 537 * task->thread.uw.fpsimd_state must already have been initialised with
 538 * the new FPSIMD register values to be merged in.
 539 */
 540void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 541{
 542        unsigned int vq;
 543        void *sst = task->thread.sve_state;
 544        struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 545        unsigned int i;
 546
 547        if (!test_tsk_thread_flag(task, TIF_SVE))
 548                return;
 549
 550        vq = sve_vq_from_vl(task->thread.sve_vl);
 551
 552        memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
 553
 554        for (i = 0; i < 32; ++i)
 555                memcpy(ZREG(sst, vq, i), &fst->vregs[i],
 556                       sizeof(fst->vregs[i]));
 557}
 558
 559int sve_set_vector_length(struct task_struct *task,
 560                          unsigned long vl, unsigned long flags)
 561{
 562        if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
 563                                     PR_SVE_SET_VL_ONEXEC))
 564                return -EINVAL;
 565
 566        if (!sve_vl_valid(vl))
 567                return -EINVAL;
 568
 569        /*
 570         * Clamp to the maximum vector length that VL-agnostic SVE code can
 571         * work with.  A flag may be assigned in the future to allow setting
 572         * of larger vector lengths without confusing older software.
 573         */
 574        if (vl > SVE_VL_ARCH_MAX)
 575                vl = SVE_VL_ARCH_MAX;
 576
 577        vl = find_supported_vector_length(vl);
 578
 579        if (flags & (PR_SVE_VL_INHERIT |
 580                     PR_SVE_SET_VL_ONEXEC))
 581                task->thread.sve_vl_onexec = vl;
 582        else
 583                /* Reset VL to system default on next exec: */
 584                task->thread.sve_vl_onexec = 0;
 585
 586        /* Only actually set the VL if not deferred: */
 587        if (flags & PR_SVE_SET_VL_ONEXEC)
 588                goto out;
 589
 590        if (vl == task->thread.sve_vl)
 591                goto out;
 592
 593        /*
 594         * To ensure the FPSIMD bits of the SVE vector registers are preserved,
 595         * write any live register state back to task_struct, and convert to a
 596         * non-SVE thread.
 597         */
 598        if (task == current) {
 599                local_bh_disable();
 600
 601                task_fpsimd_save();
 602                set_thread_flag(TIF_FOREIGN_FPSTATE);
 603        }
 604
 605        fpsimd_flush_task_state(task);
 606        if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
 607                sve_to_fpsimd(task);
 608
 609        if (task == current)
 610                local_bh_enable();
 611
 612        /*
 613         * Force reallocation of task SVE state to the correct size
 614         * on next use:
 615         */
 616        sve_free(task);
 617
 618        task->thread.sve_vl = vl;
 619
 620out:
 621        if (flags & PR_SVE_VL_INHERIT)
 622                set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
 623        else
 624                clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
 625
 626        return 0;
 627}
 628
 629/*
 630 * Encode the current vector length and flags for return.
 631 * This is only required for prctl(): ptrace has separate fields
 632 *
 633 * flags are as for sve_set_vector_length().
 634 */
 635static int sve_prctl_status(unsigned long flags)
 636{
 637        int ret;
 638
 639        if (flags & PR_SVE_SET_VL_ONEXEC)
 640                ret = current->thread.sve_vl_onexec;
 641        else
 642                ret = current->thread.sve_vl;
 643
 644        if (test_thread_flag(TIF_SVE_VL_INHERIT))
 645                ret |= PR_SVE_VL_INHERIT;
 646
 647        return ret;
 648}
 649
 650/* PR_SVE_SET_VL */
 651int sve_set_current_vl(unsigned long arg)
 652{
 653        unsigned long vl, flags;
 654        int ret;
 655
 656        vl = arg & PR_SVE_VL_LEN_MASK;
 657        flags = arg & ~vl;
 658
 659        if (!system_supports_sve())
 660                return -EINVAL;
 661
 662        ret = sve_set_vector_length(current, vl, flags);
 663        if (ret)
 664                return ret;
 665
 666        return sve_prctl_status(flags);
 667}
 668
 669/* PR_SVE_GET_VL */
 670int sve_get_current_vl(void)
 671{
 672        if (!system_supports_sve())
 673                return -EINVAL;
 674
 675        return sve_prctl_status(0);
 676}
 677
 678/*
 679 * Bitmap for temporary storage of the per-CPU set of supported vector lengths
 680 * during secondary boot.
 681 */
 682static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
 683
 684static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
 685{
 686        unsigned int vq, vl;
 687        unsigned long zcr;
 688
 689        bitmap_zero(map, SVE_VQ_MAX);
 690
 691        zcr = ZCR_ELx_LEN_MASK;
 692        zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
 693
 694        for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
 695                write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
 696                vl = sve_get_vl();
 697                vq = sve_vq_from_vl(vl); /* skip intervening lengths */
 698                set_bit(vq_to_bit(vq), map);
 699        }
 700}
 701
 702void __init sve_init_vq_map(void)
 703{
 704        sve_probe_vqs(sve_vq_map);
 705}
 706
 707/*
 708 * If we haven't committed to the set of supported VQs yet, filter out
 709 * those not supported by the current CPU.
 710 */
 711void sve_update_vq_map(void)
 712{
 713        sve_probe_vqs(sve_secondary_vq_map);
 714        bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
 715}
 716
 717/* Check whether the current CPU supports all VQs in the committed set */
 718int sve_verify_vq_map(void)
 719{
 720        int ret = 0;
 721
 722        sve_probe_vqs(sve_secondary_vq_map);
 723        bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
 724                      SVE_VQ_MAX);
 725        if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
 726                pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
 727                        smp_processor_id());
 728                ret = -EINVAL;
 729        }
 730
 731        return ret;
 732}
 733
 734static void __init sve_efi_setup(void)
 735{
 736        if (!IS_ENABLED(CONFIG_EFI))
 737                return;
 738
 739        /*
 740         * alloc_percpu() warns and prints a backtrace if this goes wrong.
 741         * This is evidence of a crippled system and we are returning void,
 742         * so no attempt is made to handle this situation here.
 743         */
 744        if (!sve_vl_valid(sve_max_vl))
 745                goto fail;
 746
 747        efi_sve_state = __alloc_percpu(
 748                SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
 749        if (!efi_sve_state)
 750                goto fail;
 751
 752        return;
 753
 754fail:
 755        panic("Cannot allocate percpu memory for EFI SVE save/restore");
 756}
 757
 758/*
 759 * Enable SVE for EL1.
 760 * Intended for use by the cpufeatures code during CPU boot.
 761 */
 762void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
 763{
 764        write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
 765        isb();
 766}
 767
 768void __init sve_setup(void)
 769{
 770        u64 zcr;
 771
 772        if (!system_supports_sve())
 773                return;
 774
 775        /*
 776         * The SVE architecture mandates support for 128-bit vectors,
 777         * so sve_vq_map must have at least SVE_VQ_MIN set.
 778         * If something went wrong, at least try to patch it up:
 779         */
 780        if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
 781                set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
 782
 783        zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
 784        sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
 785
 786        /*
 787         * Sanity-check that the max VL we determined through CPU features
 788         * corresponds properly to sve_vq_map.  If not, do our best:
 789         */
 790        if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
 791                sve_max_vl = find_supported_vector_length(sve_max_vl);
 792
 793        /*
 794         * For the default VL, pick the maximum supported value <= 64.
 795         * VL == 64 is guaranteed not to grow the signal frame.
 796         */
 797        sve_default_vl = find_supported_vector_length(64);
 798
 799        pr_info("SVE: maximum available vector length %u bytes per vector\n",
 800                sve_max_vl);
 801        pr_info("SVE: default vector length %u bytes per vector\n",
 802                sve_default_vl);
 803
 804        sve_efi_setup();
 805}
 806
 807/*
 808 * Called from the put_task_struct() path, which cannot get here
 809 * unless dead_task is really dead and not schedulable.
 810 */
 811void fpsimd_release_task(struct task_struct *dead_task)
 812{
 813        __sve_free(dead_task);
 814}
 815
 816#endif /* CONFIG_ARM64_SVE */
 817
 818/*
 819 * Trapped SVE access
 820 *
 821 * Storage is allocated for the full SVE state, the current FPSIMD
 822 * register contents are migrated across, and TIF_SVE is set so that
 823 * the SVE access trap will be disabled the next time this task
 824 * reaches ret_to_user.
 825 *
 826 * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
 827 * would have disabled the SVE access trap for userspace during
 828 * ret_to_user, making an SVE access trap impossible in that case.
 829 */
 830asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 831{
 832        /* Even if we chose not to use SVE, the hardware could still trap: */
 833        if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
 834                force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 835                return;
 836        }
 837
 838        sve_alloc(current);
 839
 840        local_bh_disable();
 841
 842        task_fpsimd_save();
 843        fpsimd_to_sve(current);
 844
 845        /* Force ret_to_user to reload the registers: */
 846        fpsimd_flush_task_state(current);
 847        set_thread_flag(TIF_FOREIGN_FPSTATE);
 848
 849        if (test_and_set_thread_flag(TIF_SVE))
 850                WARN_ON(1); /* SVE access shouldn't have trapped */
 851
 852        local_bh_enable();
 853}
 854
 855/*
 856 * Trapped FP/ASIMD access.
 857 */
 858asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 859{
 860        /* TODO: implement lazy context saving/restoring */
 861        WARN_ON(1);
 862}
 863
 864/*
 865 * Raise a SIGFPE for the current process.
 866 */
 867asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 868{
 869        siginfo_t info;
 870        unsigned int si_code = FPE_FLTUNK;
 871
 872        if (esr & ESR_ELx_FP_EXC_TFV) {
 873                if (esr & FPEXC_IOF)
 874                        si_code = FPE_FLTINV;
 875                else if (esr & FPEXC_DZF)
 876                        si_code = FPE_FLTDIV;
 877                else if (esr & FPEXC_OFF)
 878                        si_code = FPE_FLTOVF;
 879                else if (esr & FPEXC_UFF)
 880                        si_code = FPE_FLTUND;
 881                else if (esr & FPEXC_IXF)
 882                        si_code = FPE_FLTRES;
 883        }
 884
 885        memset(&info, 0, sizeof(info));
 886        info.si_signo = SIGFPE;
 887        info.si_code = si_code;
 888        info.si_addr = (void __user *)instruction_pointer(regs);
 889
 890        send_sig_info(SIGFPE, &info, current);
 891}
 892
 893void fpsimd_thread_switch(struct task_struct *next)
 894{
 895        if (!system_supports_fpsimd())
 896                return;
 897        /*
 898         * Save the current FPSIMD state to memory, but only if whatever is in
 899         * the registers is in fact the most recent userland FPSIMD state of
 900         * 'current'.
 901         */
 902        if (current->mm)
 903                task_fpsimd_save();
 904
 905        if (next->mm) {
 906                /*
 907                 * If we are switching to a task whose most recent userland
 908                 * FPSIMD state is already in the registers of *this* cpu,
 909                 * we can skip loading the state from memory. Otherwise, set
 910                 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
 911                 * upon the next return to userland.
 912                 */
 913                if (__this_cpu_read(fpsimd_last_state.st) ==
 914                        &next->thread.uw.fpsimd_state
 915                    && next->thread.fpsimd_cpu == smp_processor_id())
 916                        clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 917                else
 918                        set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 919        }
 920}
 921
 922void fpsimd_flush_thread(void)
 923{
 924        int vl, supported_vl;
 925
 926        if (!system_supports_fpsimd())
 927                return;
 928
 929        local_bh_disable();
 930
 931        memset(&current->thread.uw.fpsimd_state, 0,
 932               sizeof(current->thread.uw.fpsimd_state));
 933        fpsimd_flush_task_state(current);
 934
 935        if (system_supports_sve()) {
 936                clear_thread_flag(TIF_SVE);
 937                sve_free(current);
 938
 939                /*
 940                 * Reset the task vector length as required.
 941                 * This is where we ensure that all user tasks have a valid
 942                 * vector length configured: no kernel task can become a user
 943                 * task without an exec and hence a call to this function.
 944                 * By the time the first call to this function is made, all
 945                 * early hardware probing is complete, so sve_default_vl
 946                 * should be valid.
 947                 * If a bug causes this to go wrong, we make some noise and
 948                 * try to fudge thread.sve_vl to a safe value here.
 949                 */
 950                vl = current->thread.sve_vl_onexec ?
 951                        current->thread.sve_vl_onexec : sve_default_vl;
 952
 953                if (WARN_ON(!sve_vl_valid(vl)))
 954                        vl = SVE_VL_MIN;
 955
 956                supported_vl = find_supported_vector_length(vl);
 957                if (WARN_ON(supported_vl != vl))
 958                        vl = supported_vl;
 959
 960                current->thread.sve_vl = vl;
 961
 962                /*
 963                 * If the task is not set to inherit, ensure that the vector
 964                 * length will be reset by a subsequent exec:
 965                 */
 966                if (!test_thread_flag(TIF_SVE_VL_INHERIT))
 967                        current->thread.sve_vl_onexec = 0;
 968        }
 969
 970        set_thread_flag(TIF_FOREIGN_FPSTATE);
 971
 972        local_bh_enable();
 973}
 974
 975/*
 976 * Save the userland FPSIMD state of 'current' to memory, but only if the state
 977 * currently held in the registers does in fact belong to 'current'
 978 */
 979void fpsimd_preserve_current_state(void)
 980{
 981        if (!system_supports_fpsimd())
 982                return;
 983
 984        local_bh_disable();
 985        task_fpsimd_save();
 986        local_bh_enable();
 987}
 988
 989/*
 990 * Like fpsimd_preserve_current_state(), but ensure that
 991 * current->thread.uw.fpsimd_state is updated so that it can be copied to
 992 * the signal frame.
 993 */
 994void fpsimd_signal_preserve_current_state(void)
 995{
 996        fpsimd_preserve_current_state();
 997        if (system_supports_sve() && test_thread_flag(TIF_SVE))
 998                sve_to_fpsimd(current);
 999}
1000
1001/*
1002 * Associate current's FPSIMD context with this cpu
1003 * Preemption must be disabled when calling this function.
1004 */
1005static void fpsimd_bind_to_cpu(void)
1006{
1007        struct fpsimd_last_state_struct *last =
1008                this_cpu_ptr(&fpsimd_last_state);
1009
1010        last->st = &current->thread.uw.fpsimd_state;
1011        last->sve_in_use = test_thread_flag(TIF_SVE);
1012        current->thread.fpsimd_cpu = smp_processor_id();
1013}
1014
1015/*
1016 * Load the userland FPSIMD state of 'current' from memory, but only if the
1017 * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
1018 * state of 'current'
1019 */
1020void fpsimd_restore_current_state(void)
1021{
1022        if (!system_supports_fpsimd())
1023                return;
1024
1025        local_bh_disable();
1026
1027        if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1028                task_fpsimd_load();
1029                fpsimd_bind_to_cpu();
1030        }
1031
1032        local_bh_enable();
1033}
1034
1035/*
1036 * Load an updated userland FPSIMD state for 'current' from memory and set the
1037 * flag that indicates that the FPSIMD register contents are the most recent
1038 * FPSIMD state of 'current'
1039 */
1040void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1041{
1042        if (!system_supports_fpsimd())
1043                return;
1044
1045        local_bh_disable();
1046
1047        current->thread.uw.fpsimd_state = *state;
1048        if (system_supports_sve() && test_thread_flag(TIF_SVE))
1049                fpsimd_to_sve(current);
1050
1051        task_fpsimd_load();
1052
1053        if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
1054                fpsimd_bind_to_cpu();
1055
1056        local_bh_enable();
1057}
1058
1059/*
1060 * Invalidate live CPU copies of task t's FPSIMD state
1061 */
1062void fpsimd_flush_task_state(struct task_struct *t)
1063{
1064        t->thread.fpsimd_cpu = NR_CPUS;
1065}
1066
1067static inline void fpsimd_flush_cpu_state(void)
1068{
1069        __this_cpu_write(fpsimd_last_state.st, NULL);
1070}
1071
1072/*
1073 * Invalidate any task SVE state currently held in this CPU's regs.
1074 *
1075 * This is used to prevent the kernel from trying to reuse SVE register data
1076 * that is detroyed by KVM guest enter/exit.  This function should go away when
1077 * KVM SVE support is implemented.  Don't use it for anything else.
1078 */
1079#ifdef CONFIG_ARM64_SVE
1080void sve_flush_cpu_state(void)
1081{
1082        struct fpsimd_last_state_struct const *last =
1083                this_cpu_ptr(&fpsimd_last_state);
1084
1085        if (last->st && last->sve_in_use)
1086                fpsimd_flush_cpu_state();
1087}
1088#endif /* CONFIG_ARM64_SVE */
1089
1090#ifdef CONFIG_KERNEL_MODE_NEON
1091
1092DEFINE_PER_CPU(bool, kernel_neon_busy);
1093EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
1094
1095/*
1096 * Kernel-side NEON support functions
1097 */
1098
1099/*
1100 * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
1101 * context
1102 *
1103 * Must not be called unless may_use_simd() returns true.
1104 * Task context in the FPSIMD registers is saved back to memory as necessary.
1105 *
1106 * A matching call to kernel_neon_end() must be made before returning from the
1107 * calling context.
1108 *
1109 * The caller may freely use the FPSIMD registers until kernel_neon_end() is
1110 * called.
1111 */
1112void kernel_neon_begin(void)
1113{
1114        if (WARN_ON(!system_supports_fpsimd()))
1115                return;
1116
1117        BUG_ON(!may_use_simd());
1118
1119        local_bh_disable();
1120
1121        __this_cpu_write(kernel_neon_busy, true);
1122
1123        /* Save unsaved task fpsimd state, if any: */
1124        if (current->mm) {
1125                task_fpsimd_save();
1126                set_thread_flag(TIF_FOREIGN_FPSTATE);
1127        }
1128
1129        /* Invalidate any task state remaining in the fpsimd regs: */
1130        fpsimd_flush_cpu_state();
1131
1132        preempt_disable();
1133
1134        local_bh_enable();
1135}
1136EXPORT_SYMBOL(kernel_neon_begin);
1137
1138/*
1139 * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
1140 *
1141 * Must be called from a context in which kernel_neon_begin() was previously
1142 * called, with no call to kernel_neon_end() in the meantime.
1143 *
1144 * The caller must not use the FPSIMD registers after this function is called,
1145 * unless kernel_neon_begin() is called again in the meantime.
1146 */
1147void kernel_neon_end(void)
1148{
1149        bool busy;
1150
1151        if (!system_supports_fpsimd())
1152                return;
1153
1154        busy = __this_cpu_xchg(kernel_neon_busy, false);
1155        WARN_ON(!busy); /* No matching kernel_neon_begin()? */
1156
1157        preempt_enable();
1158}
1159EXPORT_SYMBOL(kernel_neon_end);
1160
1161#ifdef CONFIG_EFI
1162
1163static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
1164static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1165static DEFINE_PER_CPU(bool, efi_sve_state_used);
1166
1167/*
1168 * EFI runtime services support functions
1169 *
1170 * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
1171 * This means that for EFI (and only for EFI), we have to assume that FPSIMD
1172 * is always used rather than being an optional accelerator.
1173 *
1174 * These functions provide the necessary support for ensuring FPSIMD
1175 * save/restore in the contexts from which EFI is used.
1176 *
1177 * Do not use them for any other purpose -- if tempted to do so, you are
1178 * either doing something wrong or you need to propose some refactoring.
1179 */
1180
1181/*
1182 * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
1183 */
1184void __efi_fpsimd_begin(void)
1185{
1186        if (!system_supports_fpsimd())
1187                return;
1188
1189        WARN_ON(preemptible());
1190
1191        if (may_use_simd()) {
1192                kernel_neon_begin();
1193        } else {
1194                /*
1195                 * If !efi_sve_state, SVE can't be in use yet and doesn't need
1196                 * preserving:
1197                 */
1198                if (system_supports_sve() && likely(efi_sve_state)) {
1199                        char *sve_state = this_cpu_ptr(efi_sve_state);
1200
1201                        __this_cpu_write(efi_sve_state_used, true);
1202
1203                        sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
1204                                       &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
1205                } else {
1206                        fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
1207                }
1208
1209                __this_cpu_write(efi_fpsimd_state_used, true);
1210        }
1211}
1212
1213/*
1214 * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
1215 */
1216void __efi_fpsimd_end(void)
1217{
1218        if (!system_supports_fpsimd())
1219                return;
1220
1221        if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
1222                kernel_neon_end();
1223        } else {
1224                if (system_supports_sve() &&
1225                    likely(__this_cpu_read(efi_sve_state_used))) {
1226                        char const *sve_state = this_cpu_ptr(efi_sve_state);
1227
1228                        sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
1229                                       &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1230                                       sve_vq_from_vl(sve_get_vl()) - 1);
1231
1232                        __this_cpu_write(efi_sve_state_used, false);
1233                } else {
1234                        fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
1235                }
1236        }
1237}
1238
1239#endif /* CONFIG_EFI */
1240
1241#endif /* CONFIG_KERNEL_MODE_NEON */
1242
1243#ifdef CONFIG_CPU_PM
1244static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
1245                                  unsigned long cmd, void *v)
1246{
1247        switch (cmd) {
1248        case CPU_PM_ENTER:
1249                if (current->mm)
1250                        task_fpsimd_save();
1251                fpsimd_flush_cpu_state();
1252                break;
1253        case CPU_PM_EXIT:
1254                if (current->mm)
1255                        set_thread_flag(TIF_FOREIGN_FPSTATE);
1256                break;
1257        case CPU_PM_ENTER_FAILED:
1258        default:
1259                return NOTIFY_DONE;
1260        }
1261        return NOTIFY_OK;
1262}
1263
1264static struct notifier_block fpsimd_cpu_pm_notifier_block = {
1265        .notifier_call = fpsimd_cpu_pm_notifier,
1266};
1267
1268static void __init fpsimd_pm_init(void)
1269{
1270        cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
1271}
1272
1273#else
1274static inline void fpsimd_pm_init(void) { }
1275#endif /* CONFIG_CPU_PM */
1276
1277#ifdef CONFIG_HOTPLUG_CPU
1278static int fpsimd_cpu_dead(unsigned int cpu)
1279{
1280        per_cpu(fpsimd_last_state.st, cpu) = NULL;
1281        return 0;
1282}
1283
1284static inline void fpsimd_hotplug_init(void)
1285{
1286        cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
1287                                  NULL, fpsimd_cpu_dead);
1288}
1289
1290#else
1291static inline void fpsimd_hotplug_init(void) { }
1292#endif
1293
1294/*
1295 * FP/SIMD support code initialisation.
1296 */
1297static int __init fpsimd_init(void)
1298{
1299        if (elf_hwcap & HWCAP_FP) {
1300                fpsimd_pm_init();
1301                fpsimd_hotplug_init();
1302        } else {
1303                pr_notice("Floating-point is not implemented\n");
1304        }
1305
1306        if (!(elf_hwcap & HWCAP_ASIMD))
1307                pr_notice("Advanced SIMD is not implemented\n");
1308
1309        return sve_sysctl_init();
1310}
1311core_initcall(fpsimd_init);
1312