1
2
3
4
5
6
7
8
9#include <linux/bitmap.h>
10#include <linux/bitops.h>
11#include <linux/bottom_half.h>
12#include <linux/bug.h>
13#include <linux/cache.h>
14#include <linux/compat.h>
15#include <linux/cpu.h>
16#include <linux/cpu_pm.h>
17#include <linux/kernel.h>
18#include <linux/linkage.h>
19#include <linux/irqflags.h>
20#include <linux/init.h>
21#include <linux/percpu.h>
22#include <linux/prctl.h>
23#include <linux/preempt.h>
24#include <linux/ptrace.h>
25#include <linux/sched/signal.h>
26#include <linux/sched/task_stack.h>
27#include <linux/signal.h>
28#include <linux/slab.h>
29#include <linux/stddef.h>
30#include <linux/sysctl.h>
31#include <linux/swab.h>
32
33#include <asm/esr.h>
34#include <asm/fpsimd.h>
35#include <asm/cpufeature.h>
36#include <asm/cputype.h>
37#include <asm/processor.h>
38#include <asm/simd.h>
39#include <asm/sigcontext.h>
40#include <asm/sysreg.h>
41#include <asm/traps.h>
42#include <asm/virt.h>
43
44#define FPEXC_IOF (1 << 0)
45#define FPEXC_DZF (1 << 1)
46#define FPEXC_OFF (1 << 2)
47#define FPEXC_UFF (1 << 3)
48#define FPEXC_IXF (1 << 4)
49#define FPEXC_IDF (1 << 7)
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113struct fpsimd_last_state_struct {
114 struct user_fpsimd_state *st;
115 void *sve_state;
116 unsigned int sve_vl;
117};
118
119static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
120
121
122static int sve_default_vl = -1;
123
124#ifdef CONFIG_ARM64_SVE
125
126
127int __ro_after_init sve_max_vl = SVE_VL_MIN;
128int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
129
130
131
132
133
134__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
135
136static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
137
138static void __percpu *efi_sve_state;
139
140#else
141
142
143extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
144extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
145extern void __percpu *efi_sve_state;
146
147#endif
148
149DEFINE_PER_CPU(bool, fpsimd_context_busy);
150EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
151
152static void __get_cpu_fpsimd_context(void)
153{
154 bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
155
156 WARN_ON(busy);
157}
158
159
160
161
162
163
164
165
166
167
168static void get_cpu_fpsimd_context(void)
169{
170 preempt_disable();
171 __get_cpu_fpsimd_context();
172}
173
174static void __put_cpu_fpsimd_context(void)
175{
176 bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
177
178 WARN_ON(!busy);
179}
180
181
182
183
184
185
186
187
188static void put_cpu_fpsimd_context(void)
189{
190 __put_cpu_fpsimd_context();
191 preempt_enable();
192}
193
194static bool have_cpu_fpsimd_context(void)
195{
196 return !preemptible() && __this_cpu_read(fpsimd_context_busy);
197}
198
199
200
201
202
203static void __sve_free(struct task_struct *task)
204{
205 kfree(task->thread.sve_state);
206 task->thread.sve_state = NULL;
207}
208
209static void sve_free(struct task_struct *task)
210{
211 WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
212
213 __sve_free(task);
214}
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270static void task_fpsimd_load(void)
271{
272 WARN_ON(!have_cpu_fpsimd_context());
273
274 if (system_supports_sve() && test_thread_flag(TIF_SVE))
275 sve_load_state(sve_pffr(¤t->thread),
276 ¤t->thread.uw.fpsimd_state.fpsr,
277 sve_vq_from_vl(current->thread.sve_vl) - 1);
278 else
279 fpsimd_load_state(¤t->thread.uw.fpsimd_state);
280}
281
282
283
284
285
286static void fpsimd_save(void)
287{
288 struct fpsimd_last_state_struct const *last =
289 this_cpu_ptr(&fpsimd_last_state);
290
291
292 WARN_ON(!have_cpu_fpsimd_context());
293
294 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
295 if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
296 if (WARN_ON(sve_get_vl() != last->sve_vl)) {
297
298
299
300
301
302 force_signal_inject(SIGKILL, SI_KERNEL, 0);
303 return;
304 }
305
306 sve_save_state((char *)last->sve_state +
307 sve_ffr_offset(last->sve_vl),
308 &last->st->fpsr);
309 } else
310 fpsimd_save_state(last->st);
311 }
312}
313
314
315
316
317
318
319
320static unsigned int find_supported_vector_length(unsigned int vl)
321{
322 int bit;
323 int max_vl = sve_max_vl;
324
325 if (WARN_ON(!sve_vl_valid(vl)))
326 vl = SVE_VL_MIN;
327
328 if (WARN_ON(!sve_vl_valid(max_vl)))
329 max_vl = SVE_VL_MIN;
330
331 if (vl > max_vl)
332 vl = max_vl;
333
334 bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
335 __vq_to_bit(sve_vq_from_vl(vl)));
336 return sve_vl_from_vq(__bit_to_vq(bit));
337}
338
339#ifdef CONFIG_SYSCTL
340
341static int sve_proc_do_default_vl(struct ctl_table *table, int write,
342 void __user *buffer, size_t *lenp,
343 loff_t *ppos)
344{
345 int ret;
346 int vl = sve_default_vl;
347 struct ctl_table tmp_table = {
348 .data = &vl,
349 .maxlen = sizeof(vl),
350 };
351
352 ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
353 if (ret || !write)
354 return ret;
355
356
357 if (vl == -1)
358 vl = sve_max_vl;
359
360 if (!sve_vl_valid(vl))
361 return -EINVAL;
362
363 sve_default_vl = find_supported_vector_length(vl);
364 return 0;
365}
366
367static struct ctl_table sve_default_vl_table[] = {
368 {
369 .procname = "sve_default_vector_length",
370 .mode = 0644,
371 .proc_handler = sve_proc_do_default_vl,
372 },
373 { }
374};
375
376static int __init sve_sysctl_init(void)
377{
378 if (system_supports_sve())
379 if (!register_sysctl("abi", sve_default_vl_table))
380 return -EINVAL;
381
382 return 0;
383}
384
385#else
386static int __init sve_sysctl_init(void) { return 0; }
387#endif
388
389#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
390 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
391
392#ifdef CONFIG_CPU_BIG_ENDIAN
393static __uint128_t arm64_cpu_to_le128(__uint128_t x)
394{
395 u64 a = swab64(x);
396 u64 b = swab64(x >> 64);
397
398 return ((__uint128_t)a << 64) | b;
399}
400#else
401static __uint128_t arm64_cpu_to_le128(__uint128_t x)
402{
403 return x;
404}
405#endif
406
407#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
408
409static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
410 unsigned int vq)
411{
412 unsigned int i;
413 __uint128_t *p;
414
415 for (i = 0; i < SVE_NUM_ZREGS; ++i) {
416 p = (__uint128_t *)ZREG(sst, vq, i);
417 *p = arm64_cpu_to_le128(fst->vregs[i]);
418 }
419}
420
421
422
423
424
425
426
427
428
429
430
431
432
433static void fpsimd_to_sve(struct task_struct *task)
434{
435 unsigned int vq;
436 void *sst = task->thread.sve_state;
437 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
438
439 if (!system_supports_sve())
440 return;
441
442 vq = sve_vq_from_vl(task->thread.sve_vl);
443 __fpsimd_to_sve(sst, fst, vq);
444}
445
446
447
448
449
450
451
452
453
454
455
456
457static void sve_to_fpsimd(struct task_struct *task)
458{
459 unsigned int vq;
460 void const *sst = task->thread.sve_state;
461 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
462 unsigned int i;
463 __uint128_t const *p;
464
465 if (!system_supports_sve())
466 return;
467
468 vq = sve_vq_from_vl(task->thread.sve_vl);
469 for (i = 0; i < SVE_NUM_ZREGS; ++i) {
470 p = (__uint128_t const *)ZREG(sst, vq, i);
471 fst->vregs[i] = arm64_le128_to_cpu(*p);
472 }
473}
474
475#ifdef CONFIG_ARM64_SVE
476
477
478
479
480
481size_t sve_state_size(struct task_struct const *task)
482{
483 return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
484}
485
486
487
488
489
490
491
492
493
494
495
496void sve_alloc(struct task_struct *task)
497{
498 if (task->thread.sve_state) {
499 memset(task->thread.sve_state, 0, sve_state_size(current));
500 return;
501 }
502
503
504 task->thread.sve_state =
505 kzalloc(sve_state_size(task), GFP_KERNEL);
506
507
508
509
510
511 BUG_ON(!task->thread.sve_state);
512}
513
514
515
516
517
518
519
520
521
522
523void fpsimd_sync_to_sve(struct task_struct *task)
524{
525 if (!test_tsk_thread_flag(task, TIF_SVE))
526 fpsimd_to_sve(task);
527}
528
529
530
531
532
533
534
535
536
537void sve_sync_to_fpsimd(struct task_struct *task)
538{
539 if (test_tsk_thread_flag(task, TIF_SVE))
540 sve_to_fpsimd(task);
541}
542
543
544
545
546
547
548
549
550
551
552
553
554
555void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
556{
557 unsigned int vq;
558 void *sst = task->thread.sve_state;
559 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
560
561 if (!test_tsk_thread_flag(task, TIF_SVE))
562 return;
563
564 vq = sve_vq_from_vl(task->thread.sve_vl);
565
566 memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
567 __fpsimd_to_sve(sst, fst, vq);
568}
569
570int sve_set_vector_length(struct task_struct *task,
571 unsigned long vl, unsigned long flags)
572{
573 if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
574 PR_SVE_SET_VL_ONEXEC))
575 return -EINVAL;
576
577 if (!sve_vl_valid(vl))
578 return -EINVAL;
579
580
581
582
583
584
585 if (vl > SVE_VL_ARCH_MAX)
586 vl = SVE_VL_ARCH_MAX;
587
588 vl = find_supported_vector_length(vl);
589
590 if (flags & (PR_SVE_VL_INHERIT |
591 PR_SVE_SET_VL_ONEXEC))
592 task->thread.sve_vl_onexec = vl;
593 else
594
595 task->thread.sve_vl_onexec = 0;
596
597
598 if (flags & PR_SVE_SET_VL_ONEXEC)
599 goto out;
600
601 if (vl == task->thread.sve_vl)
602 goto out;
603
604
605
606
607
608
609 if (task == current) {
610 get_cpu_fpsimd_context();
611
612 fpsimd_save();
613 }
614
615 fpsimd_flush_task_state(task);
616 if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
617 sve_to_fpsimd(task);
618
619 if (task == current)
620 put_cpu_fpsimd_context();
621
622
623
624
625
626 sve_free(task);
627
628 task->thread.sve_vl = vl;
629
630out:
631 update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
632 flags & PR_SVE_VL_INHERIT);
633
634 return 0;
635}
636
637
638
639
640
641
642
643static int sve_prctl_status(unsigned long flags)
644{
645 int ret;
646
647 if (flags & PR_SVE_SET_VL_ONEXEC)
648 ret = current->thread.sve_vl_onexec;
649 else
650 ret = current->thread.sve_vl;
651
652 if (test_thread_flag(TIF_SVE_VL_INHERIT))
653 ret |= PR_SVE_VL_INHERIT;
654
655 return ret;
656}
657
658
659int sve_set_current_vl(unsigned long arg)
660{
661 unsigned long vl, flags;
662 int ret;
663
664 vl = arg & PR_SVE_VL_LEN_MASK;
665 flags = arg & ~vl;
666
667 if (!system_supports_sve())
668 return -EINVAL;
669
670 ret = sve_set_vector_length(current, vl, flags);
671 if (ret)
672 return ret;
673
674 return sve_prctl_status(flags);
675}
676
677
678int sve_get_current_vl(void)
679{
680 if (!system_supports_sve())
681 return -EINVAL;
682
683 return sve_prctl_status(0);
684}
685
686static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
687{
688 unsigned int vq, vl;
689 unsigned long zcr;
690
691 bitmap_zero(map, SVE_VQ_MAX);
692
693 zcr = ZCR_ELx_LEN_MASK;
694 zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
695
696 for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
697 write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
698 vl = sve_get_vl();
699 vq = sve_vq_from_vl(vl);
700 set_bit(__vq_to_bit(vq), map);
701 }
702}
703
704
705
706
707
708void __init sve_init_vq_map(void)
709{
710 sve_probe_vqs(sve_vq_map);
711 bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
712}
713
714
715
716
717
718
719void sve_update_vq_map(void)
720{
721 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
722
723 sve_probe_vqs(tmp_map);
724 bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
725 bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
726}
727
728
729
730
731
732int sve_verify_vq_map(void)
733{
734 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
735 unsigned long b;
736
737 sve_probe_vqs(tmp_map);
738
739 bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
740 if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
741 pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
742 smp_processor_id());
743 return -EINVAL;
744 }
745
746 if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
747 return 0;
748
749
750
751
752
753
754
755
756 bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
757
758 bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
759
760
761 b = find_last_bit(tmp_map, SVE_VQ_MAX);
762 if (b >= SVE_VQ_MAX)
763 return 0;
764
765
766
767
768
769 if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
770 pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
771 smp_processor_id());
772 return -EINVAL;
773 }
774
775 return 0;
776}
777
778static void __init sve_efi_setup(void)
779{
780 if (!IS_ENABLED(CONFIG_EFI))
781 return;
782
783
784
785
786
787
788 if (!sve_vl_valid(sve_max_vl))
789 goto fail;
790
791 efi_sve_state = __alloc_percpu(
792 SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
793 if (!efi_sve_state)
794 goto fail;
795
796 return;
797
798fail:
799 panic("Cannot allocate percpu memory for EFI SVE save/restore");
800}
801
802
803
804
805
806void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
807{
808 write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
809 isb();
810}
811
812
813
814
815
816
817
818
819u64 read_zcr_features(void)
820{
821 u64 zcr;
822 unsigned int vq_max;
823
824
825
826
827
828 sve_kernel_enable(NULL);
829 write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
830
831 zcr = read_sysreg_s(SYS_ZCR_EL1);
832 zcr &= ~(u64)ZCR_ELx_LEN_MASK;
833 vq_max = sve_vq_from_vl(sve_get_vl());
834 zcr |= vq_max - 1;
835
836 return zcr;
837}
838
839void __init sve_setup(void)
840{
841 u64 zcr;
842 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
843 unsigned long b;
844
845 if (!system_supports_sve())
846 return;
847
848
849
850
851
852
853 if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
854 set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
855
856 zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
857 sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
858
859
860
861
862
863 if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
864 sve_max_vl = find_supported_vector_length(sve_max_vl);
865
866
867
868
869
870 sve_default_vl = find_supported_vector_length(64);
871
872 bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
873 SVE_VQ_MAX);
874
875 b = find_last_bit(tmp_map, SVE_VQ_MAX);
876 if (b >= SVE_VQ_MAX)
877
878 sve_max_virtualisable_vl = SVE_VQ_MAX;
879 else if (WARN_ON(b == SVE_VQ_MAX - 1))
880
881 sve_max_virtualisable_vl = SVE_VQ_MIN;
882 else
883 sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
884
885 if (sve_max_virtualisable_vl > sve_max_vl)
886 sve_max_virtualisable_vl = sve_max_vl;
887
888 pr_info("SVE: maximum available vector length %u bytes per vector\n",
889 sve_max_vl);
890 pr_info("SVE: default vector length %u bytes per vector\n",
891 sve_default_vl);
892
893
894 if (sve_max_virtualisable_vl < sve_max_vl)
895 pr_warn("SVE: unvirtualisable vector lengths present\n");
896
897 sve_efi_setup();
898}
899
900
901
902
903
904void fpsimd_release_task(struct task_struct *dead_task)
905{
906 __sve_free(dead_task);
907}
908
909#endif
910
911
912
913
914
915
916
917
918
919
920
921
922
923asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
924{
925
926 if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
927 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
928 return;
929 }
930
931 sve_alloc(current);
932
933 get_cpu_fpsimd_context();
934
935 fpsimd_save();
936
937
938 fpsimd_flush_task_state(current);
939
940 fpsimd_to_sve(current);
941 if (test_and_set_thread_flag(TIF_SVE))
942 WARN_ON(1);
943
944 put_cpu_fpsimd_context();
945}
946
947
948
949
950asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
951{
952
953 WARN_ON(1);
954}
955
956
957
958
959asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
960{
961 unsigned int si_code = FPE_FLTUNK;
962
963 if (esr & ESR_ELx_FP_EXC_TFV) {
964 if (esr & FPEXC_IOF)
965 si_code = FPE_FLTINV;
966 else if (esr & FPEXC_DZF)
967 si_code = FPE_FLTDIV;
968 else if (esr & FPEXC_OFF)
969 si_code = FPE_FLTOVF;
970 else if (esr & FPEXC_UFF)
971 si_code = FPE_FLTUND;
972 else if (esr & FPEXC_IXF)
973 si_code = FPE_FLTRES;
974 }
975
976 send_sig_fault(SIGFPE, si_code,
977 (void __user *)instruction_pointer(regs),
978 current);
979}
980
981void fpsimd_thread_switch(struct task_struct *next)
982{
983 bool wrong_task, wrong_cpu;
984
985 if (!system_supports_fpsimd())
986 return;
987
988 __get_cpu_fpsimd_context();
989
990
991 fpsimd_save();
992
993
994
995
996
997
998 wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
999 &next->thread.uw.fpsimd_state;
1000 wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
1001
1002 update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
1003 wrong_task || wrong_cpu);
1004
1005 __put_cpu_fpsimd_context();
1006}
1007
1008void fpsimd_flush_thread(void)
1009{
1010 int vl, supported_vl;
1011
1012 if (!system_supports_fpsimd())
1013 return;
1014
1015 get_cpu_fpsimd_context();
1016
1017 fpsimd_flush_task_state(current);
1018 memset(¤t->thread.uw.fpsimd_state, 0,
1019 sizeof(current->thread.uw.fpsimd_state));
1020
1021 if (system_supports_sve()) {
1022 clear_thread_flag(TIF_SVE);
1023 sve_free(current);
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 vl = current->thread.sve_vl_onexec ?
1037 current->thread.sve_vl_onexec : sve_default_vl;
1038
1039 if (WARN_ON(!sve_vl_valid(vl)))
1040 vl = SVE_VL_MIN;
1041
1042 supported_vl = find_supported_vector_length(vl);
1043 if (WARN_ON(supported_vl != vl))
1044 vl = supported_vl;
1045
1046 current->thread.sve_vl = vl;
1047
1048
1049
1050
1051
1052 if (!test_thread_flag(TIF_SVE_VL_INHERIT))
1053 current->thread.sve_vl_onexec = 0;
1054 }
1055
1056 put_cpu_fpsimd_context();
1057}
1058
1059
1060
1061
1062
1063void fpsimd_preserve_current_state(void)
1064{
1065 if (!system_supports_fpsimd())
1066 return;
1067
1068 get_cpu_fpsimd_context();
1069 fpsimd_save();
1070 put_cpu_fpsimd_context();
1071}
1072
1073
1074
1075
1076
1077
1078void fpsimd_signal_preserve_current_state(void)
1079{
1080 fpsimd_preserve_current_state();
1081 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1082 sve_to_fpsimd(current);
1083}
1084
1085
1086
1087
1088
1089
1090void fpsimd_bind_task_to_cpu(void)
1091{
1092 struct fpsimd_last_state_struct *last =
1093 this_cpu_ptr(&fpsimd_last_state);
1094
1095 last->st = ¤t->thread.uw.fpsimd_state;
1096 last->sve_state = current->thread.sve_state;
1097 last->sve_vl = current->thread.sve_vl;
1098 current->thread.fpsimd_cpu = smp_processor_id();
1099
1100 if (system_supports_sve()) {
1101
1102 if (test_thread_flag(TIF_SVE))
1103 sve_user_enable();
1104 else
1105 sve_user_disable();
1106
1107
1108 }
1109}
1110
1111void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1112 unsigned int sve_vl)
1113{
1114 struct fpsimd_last_state_struct *last =
1115 this_cpu_ptr(&fpsimd_last_state);
1116
1117 WARN_ON(!in_softirq() && !irqs_disabled());
1118
1119 last->st = st;
1120 last->sve_state = sve_state;
1121 last->sve_vl = sve_vl;
1122}
1123
1124
1125
1126
1127
1128
1129void fpsimd_restore_current_state(void)
1130{
1131 if (!system_supports_fpsimd())
1132 return;
1133
1134 get_cpu_fpsimd_context();
1135
1136 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1137 task_fpsimd_load();
1138 fpsimd_bind_task_to_cpu();
1139 }
1140
1141 put_cpu_fpsimd_context();
1142}
1143
1144
1145
1146
1147
1148
1149void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1150{
1151 if (!system_supports_fpsimd())
1152 return;
1153
1154 get_cpu_fpsimd_context();
1155
1156 current->thread.uw.fpsimd_state = *state;
1157 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1158 fpsimd_to_sve(current);
1159
1160 task_fpsimd_load();
1161 fpsimd_bind_task_to_cpu();
1162
1163 clear_thread_flag(TIF_FOREIGN_FPSTATE);
1164
1165 put_cpu_fpsimd_context();
1166}
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179void fpsimd_flush_task_state(struct task_struct *t)
1180{
1181 t->thread.fpsimd_cpu = NR_CPUS;
1182
1183 barrier();
1184 set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
1185
1186 barrier();
1187}
1188
1189
1190
1191
1192
1193
1194static void fpsimd_flush_cpu_state(void)
1195{
1196 __this_cpu_write(fpsimd_last_state.st, NULL);
1197 set_thread_flag(TIF_FOREIGN_FPSTATE);
1198}
1199
1200
1201
1202
1203
1204void fpsimd_save_and_flush_cpu_state(void)
1205{
1206 WARN_ON(preemptible());
1207 __get_cpu_fpsimd_context();
1208 fpsimd_save();
1209 fpsimd_flush_cpu_state();
1210 __put_cpu_fpsimd_context();
1211}
1212
1213#ifdef CONFIG_KERNEL_MODE_NEON
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232void kernel_neon_begin(void)
1233{
1234 if (WARN_ON(!system_supports_fpsimd()))
1235 return;
1236
1237 BUG_ON(!may_use_simd());
1238
1239 get_cpu_fpsimd_context();
1240
1241
1242 fpsimd_save();
1243
1244
1245 fpsimd_flush_cpu_state();
1246}
1247EXPORT_SYMBOL(kernel_neon_begin);
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258void kernel_neon_end(void)
1259{
1260 if (!system_supports_fpsimd())
1261 return;
1262
1263 put_cpu_fpsimd_context();
1264}
1265EXPORT_SYMBOL(kernel_neon_end);
1266
1267#ifdef CONFIG_EFI
1268
1269static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
1270static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1271static DEFINE_PER_CPU(bool, efi_sve_state_used);
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290void __efi_fpsimd_begin(void)
1291{
1292 if (!system_supports_fpsimd())
1293 return;
1294
1295 WARN_ON(preemptible());
1296
1297 if (may_use_simd()) {
1298 kernel_neon_begin();
1299 } else {
1300
1301
1302
1303
1304 if (system_supports_sve() && likely(efi_sve_state)) {
1305 char *sve_state = this_cpu_ptr(efi_sve_state);
1306
1307 __this_cpu_write(efi_sve_state_used, true);
1308
1309 sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
1310 &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
1311 } else {
1312 fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
1313 }
1314
1315 __this_cpu_write(efi_fpsimd_state_used, true);
1316 }
1317}
1318
1319
1320
1321
1322void __efi_fpsimd_end(void)
1323{
1324 if (!system_supports_fpsimd())
1325 return;
1326
1327 if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
1328 kernel_neon_end();
1329 } else {
1330 if (system_supports_sve() &&
1331 likely(__this_cpu_read(efi_sve_state_used))) {
1332 char const *sve_state = this_cpu_ptr(efi_sve_state);
1333
1334 sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
1335 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1336 sve_vq_from_vl(sve_get_vl()) - 1);
1337
1338 __this_cpu_write(efi_sve_state_used, false);
1339 } else {
1340 fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
1341 }
1342 }
1343}
1344
1345#endif
1346
1347#endif
1348
1349#ifdef CONFIG_CPU_PM
1350static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
1351 unsigned long cmd, void *v)
1352{
1353 switch (cmd) {
1354 case CPU_PM_ENTER:
1355 fpsimd_save_and_flush_cpu_state();
1356 break;
1357 case CPU_PM_EXIT:
1358 break;
1359 case CPU_PM_ENTER_FAILED:
1360 default:
1361 return NOTIFY_DONE;
1362 }
1363 return NOTIFY_OK;
1364}
1365
1366static struct notifier_block fpsimd_cpu_pm_notifier_block = {
1367 .notifier_call = fpsimd_cpu_pm_notifier,
1368};
1369
1370static void __init fpsimd_pm_init(void)
1371{
1372 cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
1373}
1374
1375#else
1376static inline void fpsimd_pm_init(void) { }
1377#endif
1378
1379#ifdef CONFIG_HOTPLUG_CPU
1380static int fpsimd_cpu_dead(unsigned int cpu)
1381{
1382 per_cpu(fpsimd_last_state.st, cpu) = NULL;
1383 return 0;
1384}
1385
1386static inline void fpsimd_hotplug_init(void)
1387{
1388 cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
1389 NULL, fpsimd_cpu_dead);
1390}
1391
1392#else
1393static inline void fpsimd_hotplug_init(void) { }
1394#endif
1395
1396
1397
1398
1399static int __init fpsimd_init(void)
1400{
1401 if (cpu_have_named_feature(FP)) {
1402 fpsimd_pm_init();
1403 fpsimd_hotplug_init();
1404 } else {
1405 pr_notice("Floating-point is not implemented\n");
1406 }
1407
1408 if (!cpu_have_named_feature(ASIMD))
1409 pr_notice("Advanced SIMD is not implemented\n");
1410
1411 return sve_sysctl_init();
1412}
1413core_initcall(fpsimd_init);
1414