1
2
3
4
5
6
7
8
9#include <linux/bitmap.h>
10#include <linux/bitops.h>
11#include <linux/bottom_half.h>
12#include <linux/bug.h>
13#include <linux/cache.h>
14#include <linux/compat.h>
15#include <linux/cpu.h>
16#include <linux/cpu_pm.h>
17#include <linux/kernel.h>
18#include <linux/linkage.h>
19#include <linux/irqflags.h>
20#include <linux/init.h>
21#include <linux/percpu.h>
22#include <linux/prctl.h>
23#include <linux/preempt.h>
24#include <linux/ptrace.h>
25#include <linux/sched/signal.h>
26#include <linux/sched/task_stack.h>
27#include <linux/signal.h>
28#include <linux/slab.h>
29#include <linux/stddef.h>
30#include <linux/sysctl.h>
31#include <linux/swab.h>
32
33#include <asm/esr.h>
34#include <asm/fpsimd.h>
35#include <asm/cpufeature.h>
36#include <asm/cputype.h>
37#include <asm/processor.h>
38#include <asm/simd.h>
39#include <asm/sigcontext.h>
40#include <asm/sysreg.h>
41#include <asm/traps.h>
42#include <asm/virt.h>
43
44#define FPEXC_IOF (1 << 0)
45#define FPEXC_DZF (1 << 1)
46#define FPEXC_OFF (1 << 2)
47#define FPEXC_UFF (1 << 3)
48#define FPEXC_IXF (1 << 4)
49#define FPEXC_IDF (1 << 7)
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112struct fpsimd_last_state_struct {
113 struct user_fpsimd_state *st;
114 void *sve_state;
115 unsigned int sve_vl;
116};
117
118static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
119
120
121static int sve_default_vl = -1;
122
123#ifdef CONFIG_ARM64_SVE
124
125
126int __ro_after_init sve_max_vl = SVE_VL_MIN;
127int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
128
129
130
131
132
133__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
134
135static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
136
137static void __percpu *efi_sve_state;
138
139#else
140
141
142extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
143extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
144extern void __percpu *efi_sve_state;
145
146#endif
147
148
149
150
151
152static void __sve_free(struct task_struct *task)
153{
154 kfree(task->thread.sve_state);
155 task->thread.sve_state = NULL;
156}
157
158static void sve_free(struct task_struct *task)
159{
160 WARN_ON(test_tsk_thread_flag(task, TIF_SVE));
161
162 __sve_free(task);
163}
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221static void task_fpsimd_load(void)
222{
223 WARN_ON(!in_softirq() && !irqs_disabled());
224
225 if (system_supports_sve() && test_thread_flag(TIF_SVE))
226 sve_load_state(sve_pffr(¤t->thread),
227 ¤t->thread.uw.fpsimd_state.fpsr,
228 sve_vq_from_vl(current->thread.sve_vl) - 1);
229 else
230 fpsimd_load_state(¤t->thread.uw.fpsimd_state);
231}
232
233
234
235
236
237
238
239void fpsimd_save(void)
240{
241 struct fpsimd_last_state_struct const *last =
242 this_cpu_ptr(&fpsimd_last_state);
243
244
245 WARN_ON(!in_softirq() && !irqs_disabled());
246
247 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
248 if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
249 if (WARN_ON(sve_get_vl() != last->sve_vl)) {
250
251
252
253
254
255 force_signal_inject(SIGKILL, SI_KERNEL, 0);
256 return;
257 }
258
259 sve_save_state((char *)last->sve_state +
260 sve_ffr_offset(last->sve_vl),
261 &last->st->fpsr);
262 } else
263 fpsimd_save_state(last->st);
264 }
265}
266
267
268
269
270
271
272
273static unsigned int find_supported_vector_length(unsigned int vl)
274{
275 int bit;
276 int max_vl = sve_max_vl;
277
278 if (WARN_ON(!sve_vl_valid(vl)))
279 vl = SVE_VL_MIN;
280
281 if (WARN_ON(!sve_vl_valid(max_vl)))
282 max_vl = SVE_VL_MIN;
283
284 if (vl > max_vl)
285 vl = max_vl;
286
287 bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
288 __vq_to_bit(sve_vq_from_vl(vl)));
289 return sve_vl_from_vq(__bit_to_vq(bit));
290}
291
292#ifdef CONFIG_SYSCTL
293
294static int sve_proc_do_default_vl(struct ctl_table *table, int write,
295 void __user *buffer, size_t *lenp,
296 loff_t *ppos)
297{
298 int ret;
299 int vl = sve_default_vl;
300 struct ctl_table tmp_table = {
301 .data = &vl,
302 .maxlen = sizeof(vl),
303 };
304
305 ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
306 if (ret || !write)
307 return ret;
308
309
310 if (vl == -1)
311 vl = sve_max_vl;
312
313 if (!sve_vl_valid(vl))
314 return -EINVAL;
315
316 sve_default_vl = find_supported_vector_length(vl);
317 return 0;
318}
319
320static struct ctl_table sve_default_vl_table[] = {
321 {
322 .procname = "sve_default_vector_length",
323 .mode = 0644,
324 .proc_handler = sve_proc_do_default_vl,
325 },
326 { }
327};
328
329static int __init sve_sysctl_init(void)
330{
331 if (system_supports_sve())
332 if (!register_sysctl("abi", sve_default_vl_table))
333 return -EINVAL;
334
335 return 0;
336}
337
338#else
339static int __init sve_sysctl_init(void) { return 0; }
340#endif
341
342#define ZREG(sve_state, vq, n) ((char *)(sve_state) + \
343 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
344
345#ifdef CONFIG_CPU_BIG_ENDIAN
346static __uint128_t arm64_cpu_to_le128(__uint128_t x)
347{
348 u64 a = swab64(x);
349 u64 b = swab64(x >> 64);
350
351 return ((__uint128_t)a << 64) | b;
352}
353#else
354static __uint128_t arm64_cpu_to_le128(__uint128_t x)
355{
356 return x;
357}
358#endif
359
360#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
361
362
363
364
365
366
367
368
369
370
371
372
373static void fpsimd_to_sve(struct task_struct *task)
374{
375 unsigned int vq;
376 void *sst = task->thread.sve_state;
377 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
378 unsigned int i;
379 __uint128_t *p;
380
381 if (!system_supports_sve())
382 return;
383
384 vq = sve_vq_from_vl(task->thread.sve_vl);
385 for (i = 0; i < 32; ++i) {
386 p = (__uint128_t *)ZREG(sst, vq, i);
387 *p = arm64_cpu_to_le128(fst->vregs[i]);
388 }
389}
390
391
392
393
394
395
396
397
398
399
400
401static void sve_to_fpsimd(struct task_struct *task)
402{
403 unsigned int vq;
404 void const *sst = task->thread.sve_state;
405 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
406 unsigned int i;
407 __uint128_t const *p;
408
409 if (!system_supports_sve())
410 return;
411
412 vq = sve_vq_from_vl(task->thread.sve_vl);
413 for (i = 0; i < 32; ++i) {
414 p = (__uint128_t const *)ZREG(sst, vq, i);
415 fst->vregs[i] = arm64_le128_to_cpu(*p);
416 }
417}
418
419#ifdef CONFIG_ARM64_SVE
420
421
422
423
424
425size_t sve_state_size(struct task_struct const *task)
426{
427 return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl));
428}
429
430
431
432
433
434
435
436
437
438
439
440void sve_alloc(struct task_struct *task)
441{
442 if (task->thread.sve_state) {
443 memset(task->thread.sve_state, 0, sve_state_size(current));
444 return;
445 }
446
447
448 task->thread.sve_state =
449 kzalloc(sve_state_size(task), GFP_KERNEL);
450
451
452
453
454
455 BUG_ON(!task->thread.sve_state);
456}
457
458
459
460
461
462
463
464
465
466
467void fpsimd_sync_to_sve(struct task_struct *task)
468{
469 if (!test_tsk_thread_flag(task, TIF_SVE))
470 fpsimd_to_sve(task);
471}
472
473
474
475
476
477
478
479
480
481void sve_sync_to_fpsimd(struct task_struct *task)
482{
483 if (test_tsk_thread_flag(task, TIF_SVE))
484 sve_to_fpsimd(task);
485}
486
487
488
489
490
491
492
493
494
495
496
497
498
499void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
500{
501 unsigned int vq;
502 void *sst = task->thread.sve_state;
503 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
504 unsigned int i;
505 __uint128_t *p;
506
507 if (!test_tsk_thread_flag(task, TIF_SVE))
508 return;
509
510 vq = sve_vq_from_vl(task->thread.sve_vl);
511
512 memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
513
514 for (i = 0; i < 32; ++i) {
515 p = (__uint128_t *)ZREG(sst, vq, i);
516 *p = arm64_cpu_to_le128(fst->vregs[i]);
517 }
518}
519
520int sve_set_vector_length(struct task_struct *task,
521 unsigned long vl, unsigned long flags)
522{
523 if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
524 PR_SVE_SET_VL_ONEXEC))
525 return -EINVAL;
526
527 if (!sve_vl_valid(vl))
528 return -EINVAL;
529
530
531
532
533
534
535 if (vl > SVE_VL_ARCH_MAX)
536 vl = SVE_VL_ARCH_MAX;
537
538 vl = find_supported_vector_length(vl);
539
540 if (flags & (PR_SVE_VL_INHERIT |
541 PR_SVE_SET_VL_ONEXEC))
542 task->thread.sve_vl_onexec = vl;
543 else
544
545 task->thread.sve_vl_onexec = 0;
546
547
548 if (flags & PR_SVE_SET_VL_ONEXEC)
549 goto out;
550
551 if (vl == task->thread.sve_vl)
552 goto out;
553
554
555
556
557
558
559 if (task == current) {
560 local_bh_disable();
561
562 fpsimd_save();
563 }
564
565 fpsimd_flush_task_state(task);
566 if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
567 sve_to_fpsimd(task);
568
569 if (task == current)
570 local_bh_enable();
571
572
573
574
575
576 sve_free(task);
577
578 task->thread.sve_vl = vl;
579
580out:
581 update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
582 flags & PR_SVE_VL_INHERIT);
583
584 return 0;
585}
586
587
588
589
590
591
592
593static int sve_prctl_status(unsigned long flags)
594{
595 int ret;
596
597 if (flags & PR_SVE_SET_VL_ONEXEC)
598 ret = current->thread.sve_vl_onexec;
599 else
600 ret = current->thread.sve_vl;
601
602 if (test_thread_flag(TIF_SVE_VL_INHERIT))
603 ret |= PR_SVE_VL_INHERIT;
604
605 return ret;
606}
607
608
609int sve_set_current_vl(unsigned long arg)
610{
611 unsigned long vl, flags;
612 int ret;
613
614 vl = arg & PR_SVE_VL_LEN_MASK;
615 flags = arg & ~vl;
616
617 if (!system_supports_sve())
618 return -EINVAL;
619
620 ret = sve_set_vector_length(current, vl, flags);
621 if (ret)
622 return ret;
623
624 return sve_prctl_status(flags);
625}
626
627
628int sve_get_current_vl(void)
629{
630 if (!system_supports_sve())
631 return -EINVAL;
632
633 return sve_prctl_status(0);
634}
635
636static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
637{
638 unsigned int vq, vl;
639 unsigned long zcr;
640
641 bitmap_zero(map, SVE_VQ_MAX);
642
643 zcr = ZCR_ELx_LEN_MASK;
644 zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
645
646 for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
647 write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
648 vl = sve_get_vl();
649 vq = sve_vq_from_vl(vl);
650 set_bit(__vq_to_bit(vq), map);
651 }
652}
653
654
655
656
657
658void __init sve_init_vq_map(void)
659{
660 sve_probe_vqs(sve_vq_map);
661 bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
662}
663
664
665
666
667
668
669void sve_update_vq_map(void)
670{
671 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
672
673 sve_probe_vqs(tmp_map);
674 bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
675 bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
676}
677
678
679
680
681
682int sve_verify_vq_map(void)
683{
684 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
685 unsigned long b;
686
687 sve_probe_vqs(tmp_map);
688
689 bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
690 if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
691 pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
692 smp_processor_id());
693 return -EINVAL;
694 }
695
696 if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
697 return 0;
698
699
700
701
702
703
704
705
706 bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
707
708 bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
709
710
711 b = find_last_bit(tmp_map, SVE_VQ_MAX);
712 if (b >= SVE_VQ_MAX)
713 return 0;
714
715
716
717
718
719 if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
720 pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
721 smp_processor_id());
722 return -EINVAL;
723 }
724
725 return 0;
726}
727
728static void __init sve_efi_setup(void)
729{
730 if (!IS_ENABLED(CONFIG_EFI))
731 return;
732
733
734
735
736
737
738 if (!sve_vl_valid(sve_max_vl))
739 goto fail;
740
741 efi_sve_state = __alloc_percpu(
742 SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES);
743 if (!efi_sve_state)
744 goto fail;
745
746 return;
747
748fail:
749 panic("Cannot allocate percpu memory for EFI SVE save/restore");
750}
751
752
753
754
755
756void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
757{
758 write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
759 isb();
760}
761
762
763
764
765
766
767
768
769u64 read_zcr_features(void)
770{
771 u64 zcr;
772 unsigned int vq_max;
773
774
775
776
777
778 sve_kernel_enable(NULL);
779 write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
780
781 zcr = read_sysreg_s(SYS_ZCR_EL1);
782 zcr &= ~(u64)ZCR_ELx_LEN_MASK;
783 vq_max = sve_vq_from_vl(sve_get_vl());
784 zcr |= vq_max - 1;
785
786 return zcr;
787}
788
789void __init sve_setup(void)
790{
791 u64 zcr;
792 DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
793 unsigned long b;
794
795 if (!system_supports_sve())
796 return;
797
798
799
800
801
802
803 if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
804 set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
805
806 zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
807 sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
808
809
810
811
812
813 if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl)))
814 sve_max_vl = find_supported_vector_length(sve_max_vl);
815
816
817
818
819
820 sve_default_vl = find_supported_vector_length(64);
821
822 bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
823 SVE_VQ_MAX);
824
825 b = find_last_bit(tmp_map, SVE_VQ_MAX);
826 if (b >= SVE_VQ_MAX)
827
828 sve_max_virtualisable_vl = SVE_VQ_MAX;
829 else if (WARN_ON(b == SVE_VQ_MAX - 1))
830
831 sve_max_virtualisable_vl = SVE_VQ_MIN;
832 else
833 sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
834
835 if (sve_max_virtualisable_vl > sve_max_vl)
836 sve_max_virtualisable_vl = sve_max_vl;
837
838 pr_info("SVE: maximum available vector length %u bytes per vector\n",
839 sve_max_vl);
840 pr_info("SVE: default vector length %u bytes per vector\n",
841 sve_default_vl);
842
843
844 if (sve_max_virtualisable_vl < sve_max_vl)
845 pr_warn("SVE: unvirtualisable vector lengths present\n");
846
847 sve_efi_setup();
848}
849
850
851
852
853
854void fpsimd_release_task(struct task_struct *dead_task)
855{
856 __sve_free(dead_task);
857}
858
859#endif
860
861
862
863
864
865
866
867
868
869
870
871
872
873asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
874{
875
876 if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
877 force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
878 return;
879 }
880
881 sve_alloc(current);
882
883 local_bh_disable();
884
885 fpsimd_save();
886
887
888 fpsimd_flush_task_state(current);
889
890 fpsimd_to_sve(current);
891 if (test_and_set_thread_flag(TIF_SVE))
892 WARN_ON(1);
893
894 local_bh_enable();
895}
896
897
898
899
900asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
901{
902
903 WARN_ON(1);
904}
905
906
907
908
909asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
910{
911 unsigned int si_code = FPE_FLTUNK;
912
913 if (esr & ESR_ELx_FP_EXC_TFV) {
914 if (esr & FPEXC_IOF)
915 si_code = FPE_FLTINV;
916 else if (esr & FPEXC_DZF)
917 si_code = FPE_FLTDIV;
918 else if (esr & FPEXC_OFF)
919 si_code = FPE_FLTOVF;
920 else if (esr & FPEXC_UFF)
921 si_code = FPE_FLTUND;
922 else if (esr & FPEXC_IXF)
923 si_code = FPE_FLTRES;
924 }
925
926 send_sig_fault(SIGFPE, si_code,
927 (void __user *)instruction_pointer(regs),
928 current);
929}
930
931void fpsimd_thread_switch(struct task_struct *next)
932{
933 bool wrong_task, wrong_cpu;
934
935 if (!system_supports_fpsimd())
936 return;
937
938
939 fpsimd_save();
940
941
942
943
944
945
946 wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
947 &next->thread.uw.fpsimd_state;
948 wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
949
950 update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
951 wrong_task || wrong_cpu);
952}
953
954void fpsimd_flush_thread(void)
955{
956 int vl, supported_vl;
957
958 if (!system_supports_fpsimd())
959 return;
960
961 local_bh_disable();
962
963 fpsimd_flush_task_state(current);
964 memset(¤t->thread.uw.fpsimd_state, 0,
965 sizeof(current->thread.uw.fpsimd_state));
966
967 if (system_supports_sve()) {
968 clear_thread_flag(TIF_SVE);
969 sve_free(current);
970
971
972
973
974
975
976
977
978
979
980
981
982 vl = current->thread.sve_vl_onexec ?
983 current->thread.sve_vl_onexec : sve_default_vl;
984
985 if (WARN_ON(!sve_vl_valid(vl)))
986 vl = SVE_VL_MIN;
987
988 supported_vl = find_supported_vector_length(vl);
989 if (WARN_ON(supported_vl != vl))
990 vl = supported_vl;
991
992 current->thread.sve_vl = vl;
993
994
995
996
997
998 if (!test_thread_flag(TIF_SVE_VL_INHERIT))
999 current->thread.sve_vl_onexec = 0;
1000 }
1001
1002 local_bh_enable();
1003}
1004
1005
1006
1007
1008
1009void fpsimd_preserve_current_state(void)
1010{
1011 if (!system_supports_fpsimd())
1012 return;
1013
1014 local_bh_disable();
1015 fpsimd_save();
1016 local_bh_enable();
1017}
1018
1019
1020
1021
1022
1023
1024void fpsimd_signal_preserve_current_state(void)
1025{
1026 fpsimd_preserve_current_state();
1027 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1028 sve_to_fpsimd(current);
1029}
1030
1031
1032
1033
1034
1035void fpsimd_bind_task_to_cpu(void)
1036{
1037 struct fpsimd_last_state_struct *last =
1038 this_cpu_ptr(&fpsimd_last_state);
1039
1040 last->st = ¤t->thread.uw.fpsimd_state;
1041 last->sve_state = current->thread.sve_state;
1042 last->sve_vl = current->thread.sve_vl;
1043 current->thread.fpsimd_cpu = smp_processor_id();
1044
1045 if (system_supports_sve()) {
1046
1047 if (test_thread_flag(TIF_SVE))
1048 sve_user_enable();
1049 else
1050 sve_user_disable();
1051
1052
1053 }
1054}
1055
1056void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1057 unsigned int sve_vl)
1058{
1059 struct fpsimd_last_state_struct *last =
1060 this_cpu_ptr(&fpsimd_last_state);
1061
1062 WARN_ON(!in_softirq() && !irqs_disabled());
1063
1064 last->st = st;
1065 last->sve_state = sve_state;
1066 last->sve_vl = sve_vl;
1067}
1068
1069
1070
1071
1072
1073
1074void fpsimd_restore_current_state(void)
1075{
1076 if (!system_supports_fpsimd())
1077 return;
1078
1079 local_bh_disable();
1080
1081 if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
1082 task_fpsimd_load();
1083 fpsimd_bind_task_to_cpu();
1084 }
1085
1086 local_bh_enable();
1087}
1088
1089
1090
1091
1092
1093
1094void fpsimd_update_current_state(struct user_fpsimd_state const *state)
1095{
1096 if (!system_supports_fpsimd())
1097 return;
1098
1099 local_bh_disable();
1100
1101 current->thread.uw.fpsimd_state = *state;
1102 if (system_supports_sve() && test_thread_flag(TIF_SVE))
1103 fpsimd_to_sve(current);
1104
1105 task_fpsimd_load();
1106 fpsimd_bind_task_to_cpu();
1107
1108 clear_thread_flag(TIF_FOREIGN_FPSTATE);
1109
1110 local_bh_enable();
1111}
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124void fpsimd_flush_task_state(struct task_struct *t)
1125{
1126 t->thread.fpsimd_cpu = NR_CPUS;
1127
1128 barrier();
1129 set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
1130
1131 barrier();
1132}
1133
1134
1135
1136
1137
1138void fpsimd_flush_cpu_state(void)
1139{
1140 __this_cpu_write(fpsimd_last_state.st, NULL);
1141 set_thread_flag(TIF_FOREIGN_FPSTATE);
1142}
1143
1144#ifdef CONFIG_KERNEL_MODE_NEON
1145
1146DEFINE_PER_CPU(bool, kernel_neon_busy);
1147EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166void kernel_neon_begin(void)
1167{
1168 if (WARN_ON(!system_supports_fpsimd()))
1169 return;
1170
1171 BUG_ON(!may_use_simd());
1172
1173 local_bh_disable();
1174
1175 __this_cpu_write(kernel_neon_busy, true);
1176
1177
1178 fpsimd_save();
1179
1180
1181 fpsimd_flush_cpu_state();
1182
1183 preempt_disable();
1184
1185 local_bh_enable();
1186}
1187EXPORT_SYMBOL(kernel_neon_begin);
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198void kernel_neon_end(void)
1199{
1200 bool busy;
1201
1202 if (!system_supports_fpsimd())
1203 return;
1204
1205 busy = __this_cpu_xchg(kernel_neon_busy, false);
1206 WARN_ON(!busy);
1207
1208 preempt_enable();
1209}
1210EXPORT_SYMBOL(kernel_neon_end);
1211
1212#ifdef CONFIG_EFI
1213
1214static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
1215static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
1216static DEFINE_PER_CPU(bool, efi_sve_state_used);
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235void __efi_fpsimd_begin(void)
1236{
1237 if (!system_supports_fpsimd())
1238 return;
1239
1240 WARN_ON(preemptible());
1241
1242 if (may_use_simd()) {
1243 kernel_neon_begin();
1244 } else {
1245
1246
1247
1248
1249 if (system_supports_sve() && likely(efi_sve_state)) {
1250 char *sve_state = this_cpu_ptr(efi_sve_state);
1251
1252 __this_cpu_write(efi_sve_state_used, true);
1253
1254 sve_save_state(sve_state + sve_ffr_offset(sve_max_vl),
1255 &this_cpu_ptr(&efi_fpsimd_state)->fpsr);
1256 } else {
1257 fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
1258 }
1259
1260 __this_cpu_write(efi_fpsimd_state_used, true);
1261 }
1262}
1263
1264
1265
1266
1267void __efi_fpsimd_end(void)
1268{
1269 if (!system_supports_fpsimd())
1270 return;
1271
1272 if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) {
1273 kernel_neon_end();
1274 } else {
1275 if (system_supports_sve() &&
1276 likely(__this_cpu_read(efi_sve_state_used))) {
1277 char const *sve_state = this_cpu_ptr(efi_sve_state);
1278
1279 sve_load_state(sve_state + sve_ffr_offset(sve_max_vl),
1280 &this_cpu_ptr(&efi_fpsimd_state)->fpsr,
1281 sve_vq_from_vl(sve_get_vl()) - 1);
1282
1283 __this_cpu_write(efi_sve_state_used, false);
1284 } else {
1285 fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
1286 }
1287 }
1288}
1289
1290#endif
1291
1292#endif
1293
1294#ifdef CONFIG_CPU_PM
1295static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
1296 unsigned long cmd, void *v)
1297{
1298 switch (cmd) {
1299 case CPU_PM_ENTER:
1300 fpsimd_save();
1301 fpsimd_flush_cpu_state();
1302 break;
1303 case CPU_PM_EXIT:
1304 break;
1305 case CPU_PM_ENTER_FAILED:
1306 default:
1307 return NOTIFY_DONE;
1308 }
1309 return NOTIFY_OK;
1310}
1311
1312static struct notifier_block fpsimd_cpu_pm_notifier_block = {
1313 .notifier_call = fpsimd_cpu_pm_notifier,
1314};
1315
1316static void __init fpsimd_pm_init(void)
1317{
1318 cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
1319}
1320
1321#else
1322static inline void fpsimd_pm_init(void) { }
1323#endif
1324
1325#ifdef CONFIG_HOTPLUG_CPU
1326static int fpsimd_cpu_dead(unsigned int cpu)
1327{
1328 per_cpu(fpsimd_last_state.st, cpu) = NULL;
1329 return 0;
1330}
1331
1332static inline void fpsimd_hotplug_init(void)
1333{
1334 cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
1335 NULL, fpsimd_cpu_dead);
1336}
1337
1338#else
1339static inline void fpsimd_hotplug_init(void) { }
1340#endif
1341
1342
1343
1344
1345static int __init fpsimd_init(void)
1346{
1347 if (cpu_have_named_feature(FP)) {
1348 fpsimd_pm_init();
1349 fpsimd_hotplug_init();
1350 } else {
1351 pr_notice("Floating-point is not implemented\n");
1352 }
1353
1354 if (!cpu_have_named_feature(ASIMD))
1355 pr_notice("Advanced SIMD is not implemented\n");
1356
1357 return sve_sysctl_init();
1358}
1359core_initcall(fpsimd_init);
1360