1
2
3
4
5
6
7
8
9#include <linux/sysctl.h>
10#include <linux/slab.h>
11#include <linux/mm.h>
12#include <asm/mmu_context.h>
13#include <asm/pgalloc.h>
14#include <asm/gmap.h>
15#include <asm/tlb.h>
16#include <asm/tlbflush.h>
17
18#ifdef CONFIG_PGSTE
19
20static int page_table_allocate_pgste_min = 0;
21static int page_table_allocate_pgste_max = 1;
22int page_table_allocate_pgste = 0;
23EXPORT_SYMBOL(page_table_allocate_pgste);
24
25static struct ctl_table page_table_sysctl[] = {
26 {
27 .procname = "allocate_pgste",
28 .data = &page_table_allocate_pgste,
29 .maxlen = sizeof(int),
30 .mode = S_IRUGO | S_IWUSR,
31 .proc_handler = proc_dointvec_minmax,
32 .extra1 = &page_table_allocate_pgste_min,
33 .extra2 = &page_table_allocate_pgste_max,
34 },
35 { }
36};
37
38static struct ctl_table page_table_sysctl_dir[] = {
39 {
40 .procname = "vm",
41 .maxlen = 0,
42 .mode = 0555,
43 .child = page_table_sysctl,
44 },
45 { }
46};
47
48static int __init page_table_register_sysctl(void)
49{
50 return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
51}
52__initcall(page_table_register_sysctl);
53
54#endif
55
56unsigned long *crst_table_alloc(struct mm_struct *mm)
57{
58 struct page *page = alloc_pages(GFP_KERNEL, 2);
59
60 if (!page)
61 return NULL;
62 arch_set_page_dat(page, 2);
63 return (unsigned long *) page_to_phys(page);
64}
65
66void crst_table_free(struct mm_struct *mm, unsigned long *table)
67{
68 free_pages((unsigned long) table, 2);
69}
70
71static void __crst_table_upgrade(void *arg)
72{
73 struct mm_struct *mm = arg;
74
75 if (current->active_mm == mm)
76 set_user_asce(mm);
77 __tlb_flush_local();
78}
79
80int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
81{
82 unsigned long *table, *pgd;
83 int rc, notify;
84
85
86 VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
87 rc = 0;
88 notify = 0;
89 while (mm->context.asce_limit < end) {
90 table = crst_table_alloc(mm);
91 if (!table) {
92 rc = -ENOMEM;
93 break;
94 }
95 spin_lock_bh(&mm->page_table_lock);
96 pgd = (unsigned long *) mm->pgd;
97 if (mm->context.asce_limit == _REGION2_SIZE) {
98 crst_table_init(table, _REGION2_ENTRY_EMPTY);
99 p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
100 mm->pgd = (pgd_t *) table;
101 mm->context.asce_limit = _REGION1_SIZE;
102 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
103 _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
104 mm_inc_nr_puds(mm);
105 } else {
106 crst_table_init(table, _REGION1_ENTRY_EMPTY);
107 pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
108 mm->pgd = (pgd_t *) table;
109 mm->context.asce_limit = -PAGE_SIZE;
110 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
111 _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
112 }
113 notify = 1;
114 spin_unlock_bh(&mm->page_table_lock);
115 }
116 if (notify)
117 on_each_cpu(__crst_table_upgrade, mm, 0);
118 return rc;
119}
120
121void crst_table_downgrade(struct mm_struct *mm)
122{
123 pgd_t *pgd;
124
125
126 VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
127
128 if (current->active_mm == mm) {
129 clear_user_asce();
130 __tlb_flush_mm(mm);
131 }
132
133 pgd = mm->pgd;
134 mm_dec_nr_pmds(mm);
135 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
136 mm->context.asce_limit = _REGION3_SIZE;
137 mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
138 _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
139 crst_table_free(mm, (unsigned long *) pgd);
140
141 if (current->active_mm == mm)
142 set_user_asce(mm);
143}
144
145static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
146{
147 unsigned int old, new;
148
149 do {
150 old = atomic_read(v);
151 new = old ^ bits;
152 } while (atomic_cmpxchg(v, old, new) != old);
153 return new;
154}
155
156#ifdef CONFIG_PGSTE
157
158struct page *page_table_alloc_pgste(struct mm_struct *mm)
159{
160 struct page *page;
161 u64 *table;
162
163 page = alloc_page(GFP_KERNEL);
164 if (page) {
165 table = (u64 *)page_to_phys(page);
166 memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
167 memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
168 }
169 return page;
170}
171
172void page_table_free_pgste(struct page *page)
173{
174 __free_page(page);
175}
176
177#endif
178
179
180
181
182unsigned long *page_table_alloc(struct mm_struct *mm)
183{
184 unsigned long *table;
185 struct page *page;
186 unsigned int mask, bit;
187
188
189 if (!mm_alloc_pgste(mm)) {
190 table = NULL;
191 spin_lock_bh(&mm->context.lock);
192 if (!list_empty(&mm->context.pgtable_list)) {
193 page = list_first_entry(&mm->context.pgtable_list,
194 struct page, lru);
195 mask = atomic_read(&page->_refcount) >> 24;
196 mask = (mask | (mask >> 4)) & 3;
197 if (mask != 3) {
198 table = (unsigned long *) page_to_phys(page);
199 bit = mask & 1;
200 if (bit)
201 table += PTRS_PER_PTE;
202 atomic_xor_bits(&page->_refcount,
203 1U << (bit + 24));
204 list_del(&page->lru);
205 }
206 }
207 spin_unlock_bh(&mm->context.lock);
208 if (table)
209 return table;
210 }
211
212 page = alloc_page(GFP_KERNEL);
213 if (!page)
214 return NULL;
215 if (!pgtable_page_ctor(page)) {
216 __free_page(page);
217 return NULL;
218 }
219 arch_set_page_dat(page, 0);
220
221 table = (unsigned long *) page_to_phys(page);
222 if (mm_alloc_pgste(mm)) {
223
224 atomic_xor_bits(&page->_refcount, 3 << 24);
225 memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
226 memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
227 } else {
228
229 atomic_xor_bits(&page->_refcount, 1 << 24);
230 memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
231 spin_lock_bh(&mm->context.lock);
232 list_add(&page->lru, &mm->context.pgtable_list);
233 spin_unlock_bh(&mm->context.lock);
234 }
235 return table;
236}
237
238void page_table_free(struct mm_struct *mm, unsigned long *table)
239{
240 struct page *page;
241 unsigned int bit, mask;
242
243 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
244 if (!mm_alloc_pgste(mm)) {
245
246 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
247 spin_lock_bh(&mm->context.lock);
248 mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
249 mask >>= 24;
250 if (mask & 3)
251 list_add(&page->lru, &mm->context.pgtable_list);
252 else
253 list_del(&page->lru);
254 spin_unlock_bh(&mm->context.lock);
255 if (mask != 0)
256 return;
257 } else {
258 atomic_xor_bits(&page->_refcount, 3U << 24);
259 }
260
261 pgtable_page_dtor(page);
262 __free_page(page);
263}
264
265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
266 unsigned long vmaddr)
267{
268 struct mm_struct *mm;
269 struct page *page;
270 unsigned int bit, mask;
271
272 mm = tlb->mm;
273 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
274 if (mm_alloc_pgste(mm)) {
275 gmap_unlink(mm, table, vmaddr);
276 table = (unsigned long *) (__pa(table) | 3);
277 tlb_remove_table(tlb, table);
278 return;
279 }
280 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
281 spin_lock_bh(&mm->context.lock);
282 mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
283 mask >>= 24;
284 if (mask & 3)
285 list_add_tail(&page->lru, &mm->context.pgtable_list);
286 else
287 list_del(&page->lru);
288 spin_unlock_bh(&mm->context.lock);
289 table = (unsigned long *) (__pa(table) | (1U << bit));
290 tlb_remove_table(tlb, table);
291}
292
293void __tlb_remove_table(void *_table)
294{
295 unsigned int mask = (unsigned long) _table & 3;
296 void *table = (void *)((unsigned long) _table ^ mask);
297 struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
298
299 switch (mask) {
300 case 0:
301 free_pages((unsigned long) table, 2);
302 break;
303 case 1:
304 case 2:
305 mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
306 mask >>= 24;
307 if (mask != 0)
308 break;
309
310 case 3:
311 if (mask & 3)
312 atomic_xor_bits(&page->_refcount, 3 << 24);
313 pgtable_page_dtor(page);
314 __free_page(page);
315 break;
316 }
317}
318
319
320
321
322
323
324static struct kmem_cache *base_pgt_cache;
325
326static unsigned long base_pgt_alloc(void)
327{
328 u64 *table;
329
330 table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
331 if (table)
332 memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
333 return (unsigned long) table;
334}
335
336static void base_pgt_free(unsigned long table)
337{
338 kmem_cache_free(base_pgt_cache, (void *) table);
339}
340
341static unsigned long base_crst_alloc(unsigned long val)
342{
343 unsigned long table;
344
345 table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
346 if (table)
347 crst_table_init((unsigned long *)table, val);
348 return table;
349}
350
351static void base_crst_free(unsigned long table)
352{
353 free_pages(table, CRST_ALLOC_ORDER);
354}
355
356#define BASE_ADDR_END_FUNC(NAME, SIZE) \
357static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
358 unsigned long end) \
359{ \
360 unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
361 \
362 return (next - 1) < (end - 1) ? next : end; \
363}
364
365BASE_ADDR_END_FUNC(page, _PAGE_SIZE)
366BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
367BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
368BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
369BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
370
371static inline unsigned long base_lra(unsigned long address)
372{
373 unsigned long real;
374
375 asm volatile(
376 " lra %0,0(%1)\n"
377 : "=d" (real) : "a" (address) : "cc");
378 return real;
379}
380
381static int base_page_walk(unsigned long origin, unsigned long addr,
382 unsigned long end, int alloc)
383{
384 unsigned long *pte, next;
385
386 if (!alloc)
387 return 0;
388 pte = (unsigned long *) origin;
389 pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
390 do {
391 next = base_page_addr_end(addr, end);
392 *pte = base_lra(addr);
393 } while (pte++, addr = next, addr < end);
394 return 0;
395}
396
397static int base_segment_walk(unsigned long origin, unsigned long addr,
398 unsigned long end, int alloc)
399{
400 unsigned long *ste, next, table;
401 int rc;
402
403 ste = (unsigned long *) origin;
404 ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
405 do {
406 next = base_segment_addr_end(addr, end);
407 if (*ste & _SEGMENT_ENTRY_INVALID) {
408 if (!alloc)
409 continue;
410 table = base_pgt_alloc();
411 if (!table)
412 return -ENOMEM;
413 *ste = table | _SEGMENT_ENTRY;
414 }
415 table = *ste & _SEGMENT_ENTRY_ORIGIN;
416 rc = base_page_walk(table, addr, next, alloc);
417 if (rc)
418 return rc;
419 if (!alloc)
420 base_pgt_free(table);
421 cond_resched();
422 } while (ste++, addr = next, addr < end);
423 return 0;
424}
425
426static int base_region3_walk(unsigned long origin, unsigned long addr,
427 unsigned long end, int alloc)
428{
429 unsigned long *rtte, next, table;
430 int rc;
431
432 rtte = (unsigned long *) origin;
433 rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
434 do {
435 next = base_region3_addr_end(addr, end);
436 if (*rtte & _REGION_ENTRY_INVALID) {
437 if (!alloc)
438 continue;
439 table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
440 if (!table)
441 return -ENOMEM;
442 *rtte = table | _REGION3_ENTRY;
443 }
444 table = *rtte & _REGION_ENTRY_ORIGIN;
445 rc = base_segment_walk(table, addr, next, alloc);
446 if (rc)
447 return rc;
448 if (!alloc)
449 base_crst_free(table);
450 } while (rtte++, addr = next, addr < end);
451 return 0;
452}
453
454static int base_region2_walk(unsigned long origin, unsigned long addr,
455 unsigned long end, int alloc)
456{
457 unsigned long *rste, next, table;
458 int rc;
459
460 rste = (unsigned long *) origin;
461 rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
462 do {
463 next = base_region2_addr_end(addr, end);
464 if (*rste & _REGION_ENTRY_INVALID) {
465 if (!alloc)
466 continue;
467 table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
468 if (!table)
469 return -ENOMEM;
470 *rste = table | _REGION2_ENTRY;
471 }
472 table = *rste & _REGION_ENTRY_ORIGIN;
473 rc = base_region3_walk(table, addr, next, alloc);
474 if (rc)
475 return rc;
476 if (!alloc)
477 base_crst_free(table);
478 } while (rste++, addr = next, addr < end);
479 return 0;
480}
481
482static int base_region1_walk(unsigned long origin, unsigned long addr,
483 unsigned long end, int alloc)
484{
485 unsigned long *rfte, next, table;
486 int rc;
487
488 rfte = (unsigned long *) origin;
489 rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
490 do {
491 next = base_region1_addr_end(addr, end);
492 if (*rfte & _REGION_ENTRY_INVALID) {
493 if (!alloc)
494 continue;
495 table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
496 if (!table)
497 return -ENOMEM;
498 *rfte = table | _REGION1_ENTRY;
499 }
500 table = *rfte & _REGION_ENTRY_ORIGIN;
501 rc = base_region2_walk(table, addr, next, alloc);
502 if (rc)
503 return rc;
504 if (!alloc)
505 base_crst_free(table);
506 } while (rfte++, addr = next, addr < end);
507 return 0;
508}
509
510
511
512
513
514
515
516
517void base_asce_free(unsigned long asce)
518{
519 unsigned long table = asce & _ASCE_ORIGIN;
520
521 if (!asce)
522 return;
523 switch (asce & _ASCE_TYPE_MASK) {
524 case _ASCE_TYPE_SEGMENT:
525 base_segment_walk(table, 0, _REGION3_SIZE, 0);
526 break;
527 case _ASCE_TYPE_REGION3:
528 base_region3_walk(table, 0, _REGION2_SIZE, 0);
529 break;
530 case _ASCE_TYPE_REGION2:
531 base_region2_walk(table, 0, _REGION1_SIZE, 0);
532 break;
533 case _ASCE_TYPE_REGION1:
534 base_region1_walk(table, 0, -_PAGE_SIZE, 0);
535 break;
536 }
537 base_crst_free(table);
538}
539
540static int base_pgt_cache_init(void)
541{
542 static DEFINE_MUTEX(base_pgt_cache_mutex);
543 unsigned long sz = _PAGE_TABLE_SIZE;
544
545 if (base_pgt_cache)
546 return 0;
547 mutex_lock(&base_pgt_cache_mutex);
548 if (!base_pgt_cache)
549 base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
550 mutex_unlock(&base_pgt_cache_mutex);
551 return base_pgt_cache ? 0 : -ENOMEM;
552}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
570{
571 unsigned long asce, table, end;
572 int rc;
573
574 if (base_pgt_cache_init())
575 return 0;
576 end = addr + num_pages * PAGE_SIZE;
577 if (end <= _REGION3_SIZE) {
578 table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
579 if (!table)
580 return 0;
581 rc = base_segment_walk(table, addr, end, 1);
582 asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
583 } else if (end <= _REGION2_SIZE) {
584 table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
585 if (!table)
586 return 0;
587 rc = base_region3_walk(table, addr, end, 1);
588 asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
589 } else if (end <= _REGION1_SIZE) {
590 table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
591 if (!table)
592 return 0;
593 rc = base_region2_walk(table, addr, end, 1);
594 asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
595 } else {
596 table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
597 if (!table)
598 return 0;
599 rc = base_region1_walk(table, addr, end, 1);
600 asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
601 }
602 if (rc) {
603 base_asce_free(asce);
604 asce = 0;
605 }
606 return asce;
607}
608